// CuNNy 8x16C BILINEAR RGB NVL DN - https://github.com/funnyplanter/CuNNy

// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// 
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// 
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
// 
// You should have received a copy of the GNU General Public License
// along with this program.  If not, see <http://www.gnu.org/licenses/>.

//!MAGPIE EFFECT
//!VERSION 4
//!SORT_NAME CuNNy-DN-D16N08

//!TEXTURE
Texture2D INPUT;

//!TEXTURE
//!WIDTH INPUT_WIDTH * 2
//!HEIGHT INPUT_HEIGHT * 2
Texture2D OUTPUT;

//!SAMPLER
//!FILTER POINT
SamplerState SP;

//!SAMPLER
//!FILTER LINEAR
SamplerState SL;

//!COMMON
#define O(t, p) t.SampleLevel(SP, pos + p * pt, 0)
#define V4 min16float4
#define M4 min16float4x4

//!TEXTURE
//!WIDTH INPUT_WIDTH
//!HEIGHT INPUT_HEIGHT
//!FORMAT R8G8B8A8_SNORM
Texture2D t0;

//!TEXTURE
//!WIDTH INPUT_WIDTH
//!HEIGHT INPUT_HEIGHT
//!FORMAT R8G8B8A8_SNORM
Texture2D t1;

//!TEXTURE
//!WIDTH INPUT_WIDTH
//!HEIGHT INPUT_HEIGHT
//!FORMAT R8G8B8A8_SNORM
Texture2D t2;

//!TEXTURE
//!WIDTH INPUT_WIDTH
//!HEIGHT INPUT_HEIGHT
//!FORMAT R8G8B8A8_SNORM
Texture2D t3;

//!TEXTURE
//!WIDTH INPUT_WIDTH
//!HEIGHT INPUT_HEIGHT
//!FORMAT R8G8B8A8_SNORM
Texture2D t4;

//!TEXTURE
//!WIDTH INPUT_WIDTH
//!HEIGHT INPUT_HEIGHT
//!FORMAT R8G8B8A8_SNORM
Texture2D t5;

//!TEXTURE
//!WIDTH INPUT_WIDTH
//!HEIGHT INPUT_HEIGHT
//!FORMAT R8G8B8A8_SNORM
Texture2D t6;

//!TEXTURE
//!WIDTH INPUT_WIDTH
//!HEIGHT INPUT_HEIGHT
//!FORMAT R8G8B8A8_SNORM
Texture2D t7;

//!PASS 1
//!DESC in
//!BLOCK_SIZE 8
//!NUM_THREADS 64
//!IN INPUT
//!OUT t0, t1, t2, t3

#define l0(x, y) min16float((dot(float3(-1.941e-01, -3.865e-01, -8.377e-02), O(INPUT, float2(x, y)).rgb) + 2.427e-01))

V4 f0(min16float s0_0, min16float s0_1, min16float s0_2, min16float s0_3, min16float s0_4, min16float s0_5, min16float s0_6, min16float s0_7, min16float s0_8) {
	V4 r = 0.0;
	r += V4(3.079e-02, -3.606e-03, 7.865e-02, -7.318e-03) * s0_0;
	r += V4(-8.047e-03, 1.049e-02, -8.037e-02, 1.078e-02) * s0_1;
	r += V4(2.018e-03, -5.329e-03, 8.914e-02, -2.185e-03) * s0_2;
	r += V4(2.427e-01, -1.099e-01, -4.625e-02, -3.691e-01) * s0_3;
	r += V4(-2.399e-01, 1.115e-01, -4.162e-02, 3.721e-01) * s0_4;
	r += V4(-1.869e-02, -1.139e-02, 3.210e-02, 2.956e-04) * s0_5;
	r += V4(1.169e-01, -2.234e-01, -1.335e-02, -1.347e-02) * s0_6;
	r += V4(-1.421e-01, 2.203e-01, -5.437e-02, -6.029e-04) * s0_7;
	r += V4(8.644e-03, 1.055e-02, -3.855e-04, 9.967e-03) * s0_8;
	r += V4(8.018e-03, 1.569e-02, 1.090e-02, -7.794e-05);
	return r;
}

V4 f1(min16float s0_0, min16float s0_1, min16float s0_2, min16float s0_3, min16float s0_4, min16float s0_5, min16float s0_6, min16float s0_7, min16float s0_8) {
	V4 r = 0.0;
	r += V4(7.324e-02, 1.679e-02, -2.775e-02, -4.778e-02) * s0_0;
	r += V4(2.549e-01, 4.843e-02, 5.584e-02, 7.739e-02) * s0_1;
	r += V4(1.709e-02, -8.911e-02, -4.865e-02, -1.851e-02) * s0_2;
	r += V4(-6.274e-02, -7.121e-02, 1.284e-01, 1.029e-01) * s0_3;
	r += V4(-2.921e-01, -1.194e-01, -6.886e-02, 2.502e-01) * s0_4;
	r += V4(-1.090e-02, -3.670e-02, 7.626e-02, 3.154e-02) * s0_5;
	r += V4(9.342e-03, 4.821e-02, -1.915e-02, -6.984e-02) * s0_6;
	r += V4(5.284e-03, 6.002e-02, -4.112e-02, 1.237e-01) * s0_7;
	r += V4(-5.183e-03, 1.393e-01, 8.273e-03, -1.178e-02) * s0_8;
	r += V4(1.318e-02, 1.522e-02, 4.021e-03, -8.088e-02);
	return r;
}

V4 f2(min16float s0_0, min16float s0_1, min16float s0_2, min16float s0_3, min16float s0_4, min16float s0_5, min16float s0_6, min16float s0_7, min16float s0_8) {
	V4 r = 0.0;
	r += V4(1.577e-01, 5.275e-03, -8.819e-02, -1.712e-02) * s0_0;
	r += V4(-1.033e-01, 1.714e-01, -2.117e-01, -2.830e-02) * s0_1;
	r += V4(-6.247e-02, -2.103e-02, -2.257e-03, 4.623e-02) * s0_2;
	r += V4(-2.031e-02, 7.706e-02, -6.031e-02, -3.312e-01) * s0_3;
	r += V4(-1.796e-01, 2.363e-01, 1.926e-02, 1.713e-01) * s0_4;
	r += V4(9.624e-02, 8.355e-02, 1.085e-01, 1.431e-01) * s0_5;
	r += V4(3.225e-04, -7.018e-02, 3.764e-02, -6.763e-02) * s0_6;
	r += V4(5.291e-02, -4.092e-01, 1.667e-01, 9.630e-02) * s0_7;
	r += V4(7.371e-03, -7.103e-02, 2.365e-02, -1.196e-02) * s0_8;
	r += V4(1.425e-02, -3.870e-03, -1.067e-02, 7.370e-03);
	return r;
}

V4 f3(min16float s0_0, min16float s0_1, min16float s0_2, min16float s0_3, min16float s0_4, min16float s0_5, min16float s0_6, min16float s0_7, min16float s0_8) {
	V4 r = 0.0;
	r += V4(2.076e-03, 1.123e-02, 5.660e-02, -2.469e-02) * s0_0;
	r += V4(8.450e-02, -8.344e-03, 3.408e-01, 1.068e-01) * s0_1;
	r += V4(-8.521e-02, -8.863e-02, -8.628e-03, 4.745e-02) * s0_2;
	r += V4(4.301e-02, 1.143e-02, -6.893e-02, -1.578e-01) * s0_3;
	r += V4(8.960e-02, 3.037e-01, -3.176e-01, 4.196e-02) * s0_4;
	r += V4(-1.172e-01, -2.426e-01, -8.905e-03, 2.253e-01) * s0_5;
	r += V4(-4.505e-02, -7.804e-03, 1.783e-02, -8.501e-02) * s0_6;
	r += V4(-1.606e-01, -1.452e-02, -1.241e-02, -1.597e-01) * s0_7;
	r += V4(1.909e-01, 3.577e-02, 6.756e-03, 1.028e-02) * s0_8;
	r += V4(-4.116e-03, 1.699e-02, -1.220e-02, -2.878e-03);
	return r;
}

void Pass1(uint2 blockStart, uint3 tid) {
	float2 pt = float2(GetInputPt());
	uint2 gxy = Rmp8x8(tid.x) + blockStart;
	uint2 size = GetInputSize();
	if (gxy.x >= size.x || gxy.y >= size.y) {
		return;
	}
	float2 pos = (gxy + 0.5) * pt;

	min16float s0_0 = l0(-1.0, -1.0);
	min16float s0_1 = l0(0.0, -1.0);
	min16float s0_2 = l0(1.0, -1.0);
	min16float s0_3 = l0(-1.0, 0.0);
	min16float s0_4 = l0(0.0, 0.0);
	min16float s0_5 = l0(1.0, 0.0);
	min16float s0_6 = l0(-1.0, 1.0);
	min16float s0_7 = l0(0.0, 1.0);
	min16float s0_8 = l0(1.0, 1.0);

	t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8);
	t1[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8);
	t2[gxy] = f2(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8);
	t3[gxy] = f3(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8);
}

//!PASS 2
//!DESC conv1
//!BLOCK_SIZE 8
//!NUM_THREADS 64
//!IN t0, t1, t2, t3
//!OUT t4, t5, t6, t7

#define l0(x, y) V4(O(t0, float2(x, y)))
#define l1(x, y) V4(O(t1, float2(x, y)))
#define l2(x, y) V4(O(t2, float2(x, y)))
#define l3(x, y) V4(O(t3, float2(x, y)))

V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) {
	V4 r = 0.0;
	r += mul(s0_0, M4(2.048e-02, -4.829e-03, 1.116e-01, 1.424e-01, 4.054e-02, -1.403e-02, -1.093e-02, -4.677e-02, 1.253e-01, 2.540e-02, 1.268e-02, 7.303e-02, 6.252e-02, 1.872e-01, -8.308e-02, -1.259e-01));
	r += mul(s0_1, M4(-4.819e-02, 9.732e-02, 2.710e-03, 4.293e-02, -4.048e-02, -7.517e-02, -1.502e-01, -9.363e-02, -1.166e-01, -4.531e-02, 1.796e-01, 2.567e-02, -2.013e-01, 1.364e-01, -4.303e-02, -2.691e-02));
	r += mul(s0_2, M4(1.476e-01, 3.921e-02, 1.519e-01, 5.807e-02, 3.850e-02, -2.336e-01, -2.001e-01, 6.151e-03, 5.383e-02, 6.078e-02, -1.792e-01, -1.423e-01, 8.391e-02, 2.738e-01, 1.784e-01, -7.098e-02));
	r += mul(s0_3, M4(4.426e-04, -1.720e-01, -5.887e-02, 1.816e-02, 1.950e-02, 2.097e-01, -5.867e-02, 1.822e-02, -4.042e-03, -1.147e-02, -6.860e-02, 3.117e-02, 8.652e-02, 1.681e-01, -6.179e-02, -8.912e-02));
	r += mul(s0_4, M4(-2.212e-01, 2.744e-01, -3.287e-02, 1.385e-02, -2.971e-03, 2.905e-02, 4.532e-02, 4.762e-02, -2.109e-02, 5.882e-02, -4.547e-02, -2.030e-02, 1.673e-01, 6.945e-02, -3.483e-01, -4.171e-01));
	r += mul(s0_5, M4(-4.755e-02, 2.782e-01, 8.658e-02, -3.461e-01, 2.831e-02, 7.165e-03, 5.473e-02, 9.912e-02, -5.727e-02, 4.351e-02, -1.463e-01, 1.089e-01, -7.061e-02, 3.919e-01, 1.370e-01, 6.642e-02));
	r += mul(s0_6, M4(1.101e-01, 5.662e-02, -7.494e-02, -5.565e-03, 1.182e-01, -4.261e-02, 5.721e-02, 2.424e-02, 1.077e-01, 1.178e-02, 3.188e-02, -1.370e-02, 2.044e-02, 8.463e-02, -9.320e-02, 1.874e-02));
	r += mul(s0_7, M4(1.014e-02, 1.127e-01, 3.595e-02, -2.266e-01, -9.668e-02, 3.770e-02, 1.041e-01, 2.042e-02, -9.701e-02, -2.058e-02, -4.635e-02, -1.055e-01, -2.100e-01, -1.200e-01, 1.076e-01, 5.583e-01));
	r += mul(s0_8, M4(1.595e-01, -1.046e-02, -1.695e-02, 2.403e-02, -3.490e-02, -1.954e-01, -1.325e-02, -1.459e-02, 1.135e-02, -1.278e-01, 3.800e-02, -4.423e-02, -8.423e-02, -4.600e-01, -3.075e-02, 2.350e-01));
	r += mul(s1_0, M4(1.528e-02, -1.405e-02, -6.905e-02, -5.089e-02, 1.860e-02, -1.111e-02, 8.663e-02, -5.265e-02, 7.609e-02, 6.255e-02, -1.408e-01, 1.661e-01, -5.898e-02, -2.493e-01, 5.922e-02, -4.831e-02));
	r += mul(s1_1, M4(2.318e-01, -1.965e-01, -1.191e-02, 9.833e-03, 1.083e-01, 4.304e-01, 1.066e-01, -1.447e-01, 1.249e-01, 7.425e-02, 4.746e-02, 2.711e-01, -5.051e-02, -1.087e-01, -2.176e-02, 2.572e-01));
	r += mul(s1_2, M4(7.448e-02, -1.738e-01, -1.739e-01, 8.456e-02, 2.159e-01, 4.333e-01, -1.027e-01, -3.007e-01, -4.220e-02, 2.373e-01, -1.648e-01, -5.700e-02, -4.307e-02, -2.232e-01, -7.669e-02, 1.161e-01));
	r += mul(s1_3, M4(2.425e-03, 7.625e-02, -5.265e-02, 6.556e-02, 7.800e-02, -5.827e-02, -7.143e-02, -1.148e-01, -8.064e-02, 1.301e-02, -1.257e-01, 1.360e-01, -5.066e-02, -1.077e-01, 1.705e-02, 5.452e-02));
	r += mul(s1_4, M4(2.348e-01, -1.722e-01, 7.330e-02, 5.327e-01, 1.861e-01, 1.807e-01, -3.935e-03, 9.360e-02, 6.946e-03, -2.022e-02, 1.355e-01, -1.639e-01, 4.145e-01, -1.285e-01, -1.717e-01, -5.645e-01));
	r += mul(s1_5, M4(1.669e-01, -3.700e-01, -2.197e-01, 2.970e-01, -3.760e-01, 8.613e-02, 1.677e-02, -1.083e-01, 6.986e-02, 2.914e-01, 1.835e-01, -3.219e-02, -3.564e-01, -4.215e-02, 1.380e-01, 2.394e-01));
	r += mul(s1_6, M4(6.843e-02, 5.395e-02, -3.803e-02, 1.370e-01, -1.090e-02, -1.215e-01, 1.153e-02, -4.736e-02, -6.143e-02, -1.871e-01, 2.793e-01, 1.721e-01, -8.387e-03, -7.740e-03, 1.398e-01, -3.166e-02));
	r += mul(s1_7, M4(-5.026e-02, -1.521e-02, -4.912e-02, -1.959e-01, 2.314e-02, -4.775e-02, -1.317e-01, 8.754e-02, 3.595e-02, 2.588e-01, -1.679e-01, -1.083e-01, -3.263e-02, 2.192e-03, 1.870e-01, 3.609e-01));
	r += mul(s1_8, M4(1.043e-01, 1.864e-01, -3.583e-01, 5.765e-02, -7.832e-02, -4.087e-02, -3.872e-02, 1.526e-01, -1.825e-01, -1.559e-01, -9.546e-02, 3.329e-01, 1.625e-01, -1.244e-03, -8.165e-02, -2.106e-01));
	r += mul(s2_0, M4(4.537e-02, 8.269e-02, -7.632e-02, -2.650e-02, -5.914e-02, -2.708e-02, -5.644e-02, -1.955e-02, 4.075e-01, 1.023e-01, 2.545e-01, -3.611e-02, -4.924e-01, 2.777e-01, 2.361e-01, 8.845e-02));
	r += mul(s2_1, M4(1.618e-02, -4.293e-02, -2.222e-01, 9.574e-02, 5.111e-02, -7.128e-02, -8.863e-02, -9.884e-02, 2.190e-01, 2.622e-01, 4.013e-01, -2.096e-03, -5.959e-01, -5.596e-01, 2.032e-01, -5.020e-01));
	r += mul(s2_2, M4(-2.909e-02, -4.977e-02, -1.716e-01, 7.754e-02, -3.726e-02, -6.215e-02, -2.769e-02, 4.118e-02, 3.981e-01, -9.391e-02, 3.138e-01, 6.588e-03, -5.657e-02, 3.557e-01, 5.208e-02, 1.875e-01));
	r += mul(s2_3, M4(-1.199e-02, 3.765e-02, 2.132e-01, 1.092e-01, 7.937e-02, 8.123e-02, 3.154e-02, 1.558e-01, -3.848e-01, 4.161e-02, -4.289e-01, 2.959e-01, -3.045e-01, -1.637e-01, -1.188e-01, -2.289e-01));
	r += mul(s2_4, M4(-1.106e-01, -6.425e-02, -2.421e-05, 2.504e-01, -5.712e-02, -4.485e-02, 9.015e-02, 1.155e-01, -4.607e-01, 4.294e-01, -8.332e-02, 8.106e-03, -5.400e-01, -5.825e-01, -4.254e-01, -6.588e-01));
	r += mul(s2_5, M4(-6.044e-02, -1.510e-02, -1.153e-01, 1.034e-01, -3.919e-02, 1.693e-01, -9.993e-02, 9.887e-02, -4.767e-01, -5.224e-01, -2.356e-01, -2.719e-01, 2.220e-01, -5.790e-02, -2.353e-01, 3.756e-01));
	r += mul(s2_6, M4(-3.249e-02, 1.989e-01, -9.129e-02, -2.266e-01, 9.810e-02, 1.517e-02, -6.743e-02, -7.139e-02, 5.856e-02, -1.223e-01, 1.849e-01, -2.400e-01, -6.638e-01, -1.431e-01, 7.543e-02, 7.866e-02));
	r += mul(s2_7, M4(1.129e-01, -2.504e-01, -1.268e-01, -4.894e-01, -2.064e-02, -6.057e-02, 1.197e-01, -2.185e-01, -9.475e-02, -5.783e-03, 1.411e-01, 1.097e-02, -6.736e-01, 2.996e-01, 2.517e-02, 5.610e-01));
	r += mul(s2_8, M4(7.648e-02, -8.256e-02, -1.898e-03, -6.823e-02, -3.014e-02, 2.219e-02, -1.916e-02, 9.177e-02, -1.586e-01, 6.034e-02, -1.241e-01, 8.531e-02, -9.582e-02, 1.067e+00, -1.194e-01, 4.834e-01));
	r += mul(s3_0, M4(2.375e-01, -4.091e-01, 1.832e-01, 1.283e-01, -6.383e-03, 7.594e-02, 2.752e-01, -1.586e-02, 3.441e-02, -5.528e-02, 6.215e-02, -5.897e-02, 6.831e-02, 1.216e-02, 4.011e-02, -3.186e-02));
	r += mul(s3_1, M4(6.683e-02, 2.236e-01, 3.607e-01, -7.426e-02, 2.785e-03, 1.162e-01, -2.118e-01, -1.945e-03, -1.978e-02, -3.249e-03, 1.293e-01, -1.713e-01, 1.218e-01, -1.215e-02, -1.414e-01, -6.262e-02));
	r += mul(s3_2, M4(-3.481e-02, 1.937e-01, 5.413e-02, -6.549e-02, -1.993e-02, -9.367e-02, -1.667e-01, -3.937e-02, 2.967e-02, -7.572e-02, -1.416e-01, 3.481e-02, 1.576e-01, 6.027e-02, -3.635e-03, -7.787e-02));
	r += mul(s3_3, M4(1.250e-01, -4.078e-01, 1.158e-01, -1.665e-02, -3.614e-03, -1.915e-01, 2.157e-01, -5.540e-01, -9.113e-02, 1.063e-01, 7.709e-02, 4.088e-02, 2.007e-01, -8.304e-02, -3.002e-02, -5.822e-02));
	r += mul(s3_4, M4(-4.140e-02, -8.432e-02, 2.778e-01, 2.684e-01, -2.473e-01, -6.407e-02, -1.096e-01, -1.436e-01, 7.597e-02, 3.470e-02, 1.170e-01, -8.363e-02, 1.282e-01, -5.355e-03, -3.813e-02, 2.595e-02));
	r += mul(s3_5, M4(8.598e-02, -1.880e-01, 1.063e-01, -3.625e-02, -1.067e-02, -1.114e-01, 4.480e-02, -7.637e-02, 1.680e-02, 1.438e-01, 8.682e-02, 5.675e-02, 1.712e-01, -1.034e-01, -5.088e-02, -3.905e-02));
	r += mul(s3_6, M4(1.316e-01, 2.414e-02, 1.710e-01, -1.727e-02, 8.122e-02, -1.222e-01, 1.751e-01, -1.420e-01, 2.528e-02, -6.473e-02, 1.683e-03, 4.588e-02, 8.350e-03, 1.496e-02, -4.137e-03, -2.057e-02));
	r += mul(s3_7, M4(2.034e-01, 5.782e-01, 3.159e-01, 3.472e-01, 1.365e-01, -2.080e-01, 3.101e-02, 1.994e-01, -5.950e-02, -1.722e-01, -6.752e-02, -7.090e-02, 4.903e-02, -3.817e-02, -5.804e-02, 7.004e-02));
	r += mul(s3_8, M4(8.316e-02, -5.188e-02, 2.791e-02, 2.633e-01, 8.128e-02, -6.263e-02, 4.355e-03, 1.811e-01, 1.124e-03, 1.370e-01, -1.033e-01, 9.403e-02, 4.089e-03, 1.682e-01, -1.840e-02, 1.889e-01));
	r += mul(s4_0, M4(6.883e-02, -3.365e-02, -7.749e-02, -1.125e-02, -5.791e-02, 8.547e-02, -1.313e-01, 8.004e-02, 1.737e-01, -1.435e-01, 5.896e-02, 3.570e-01, 6.774e-03, -9.935e-02, -7.111e-02, -2.184e-02));
	r += mul(s4_1, M4(2.542e-02, 4.588e-02, -2.912e-02, 7.918e-02, -1.258e-01, 1.255e-01, 2.762e-01, 8.645e-02, 1.868e-01, -4.082e-02, 1.071e-01, -8.159e-02, 1.061e-01, -7.220e-02, 7.972e-02, -1.144e-01));
	r += mul(s4_2, M4(-3.943e-02, -2.530e-03, -1.421e-01, 1.234e-02, -6.156e-02, -5.961e-02, 1.371e-01, -8.217e-02, -1.241e-02, 2.655e-02, 1.023e-01, 2.034e-02, -2.526e-02, -2.962e-01, -1.047e-01, -5.024e-02));
	r += mul(s4_3, M4(-3.914e-02, -3.351e-02, -9.769e-02, 3.761e-02, -4.687e-02, 5.776e-03, -1.007e-01, -1.576e-02, 2.036e-02, 8.528e-02, -1.367e-01, -3.148e-02, 3.450e-02, 5.910e-02, -1.594e-01, 6.574e-02));
	r += mul(s4_4, M4(-1.158e-01, 9.737e-02, 4.901e-02, 9.995e-02, 3.487e-01, 2.029e-01, -1.692e-01, -3.025e-01, 2.957e-02, 1.805e-01, 2.607e-01, -2.922e-01, -3.176e-02, 7.012e-04, -6.591e-02, 1.762e-01));
	r += mul(s4_5, M4(1.198e-02, 1.014e-01, -1.126e-01, -2.023e-02, -7.713e-03, -3.943e-01, -3.786e-02, 1.089e-02, -1.520e-02, -1.209e-02, 1.198e-01, -3.087e-01, -4.675e-02, -1.251e-01, -1.260e-02, 1.825e-01));
	r += mul(s4_6, M4(-2.353e-03, 1.799e-02, -1.054e-01, -7.154e-02, 5.610e-02, -1.107e-01, -1.445e-01, -9.241e-02, -3.225e-02, -1.135e-01, -1.337e-01, -9.913e-02, -3.790e-02, -8.893e-03, 3.147e-02, 7.881e-02));
	r += mul(s4_7, M4(4.549e-02, 3.463e-03, -1.602e-01, -1.657e-01, -4.810e-02, -6.075e-02, 1.189e-01, 2.263e-01, 4.268e-02, 3.193e-02, 5.690e-02, -8.275e-02, -3.675e-02, 2.772e-02, 1.014e-01, 1.266e-01));
	r += mul(s4_8, M4(-2.574e-02, -6.567e-02, 5.514e-02, -2.007e-02, -6.985e-02, -3.602e-02, 1.411e-01, 2.116e-01, 2.192e-02, 2.803e-01, -4.727e-02, -1.305e-01, 7.359e-02, -2.774e-01, 1.381e-02, 2.945e-02));
	r += mul(s5_0, M4(1.886e-01, -3.069e-01, 1.924e-01, 8.122e-05, 1.030e-01, 3.917e-01, 9.103e-02, 1.352e-02, -5.548e-03, -9.670e-02, -1.490e-01, 4.872e-02, 1.341e-01, 2.537e-01, 3.456e-02, 2.064e-01));
	r += mul(s5_1, M4(-7.483e-03, 1.570e-01, -3.750e-02, 7.532e-02, -1.204e-01, -7.274e-02, -1.715e-01, -6.946e-02, -8.478e-03, 2.234e-02, -6.538e-02, 8.187e-02, -4.276e-02, 3.250e-01, 7.092e-02, -1.634e-01));
	r += mul(s5_2, M4(-2.400e-01, -6.757e-02, -2.895e-01, -4.304e-02, -6.639e-02, 2.374e-01, 2.146e-02, -2.000e-01, -1.112e-01, 3.496e-02, 1.347e-01, -1.487e-02, -1.085e-01, 4.659e-01, 1.224e-01, -8.265e-02));
	r += mul(s5_3, M4(3.145e-01, 1.658e-02, -2.276e-02, -1.438e-01, -3.178e-02, 3.935e-02, -3.544e-02, -1.899e-01, -5.814e-02, -1.307e-01, -1.382e-01, -3.227e-02, 1.437e-02, 1.111e-01, -1.035e-01, -7.885e-02));
	r += mul(s5_4, M4(-3.229e-01, -9.307e-02, 3.023e-01, 6.295e-03, 2.145e-01, -1.877e-01, -1.613e-01, -6.503e-01, -8.287e-03, 1.163e-01, 9.040e-02, 3.824e-01, 7.898e-02, 3.488e-02, 5.911e-02, -2.907e-01));
	r += mul(s5_5, M4(1.529e-01, -3.209e-01, 4.762e-04, -1.543e-01, -1.263e-01, 1.533e-01, -8.658e-02, -1.062e-01, 1.029e-01, 1.971e-01, 3.683e-03, -6.256e-02, 3.022e-02, 3.967e-01, 3.126e-01, 1.873e-01));
	r += mul(s5_6, M4(2.686e-01, -4.941e-01, -1.025e-01, -1.662e-01, 1.417e-01, 1.175e-01, -1.813e-01, -2.293e-01, -1.127e-01, -6.958e-02, -9.037e-02, 1.713e-01, 1.146e-01, -1.494e-01, -1.107e-01, 9.983e-02));
	r += mul(s5_7, M4(-2.660e-01, 1.089e-02, 5.858e-02, 2.201e-01, -1.121e-02, -2.135e-01, -2.460e-02, -1.489e-01, -2.915e-02, 1.928e-01, 7.983e-02, 3.235e-01, -1.740e-01, -7.737e-02, 1.455e-03, -3.482e-02));
	r += mul(s5_8, M4(-1.029e-02, 3.737e-01, 1.250e-01, 3.295e-02, -7.525e-02, 3.134e-01, -1.192e-01, 1.571e-01, -4.867e-02, 7.209e-02, -2.612e-02, 4.915e-02, 1.351e-01, -4.645e-02, -9.808e-02, -3.062e-02));
	r += mul(s6_0, M4(7.136e-02, 3.892e-01, -2.039e-02, -9.361e-02, -3.301e-02, 2.963e-02, -1.642e-02, -4.920e-03, -1.372e-01, -1.734e-01, -4.992e-02, -3.279e-02, 2.520e-02, 1.099e-01, 8.215e-02, 8.987e-02));
	r += mul(s6_1, M4(-3.586e-02, 5.550e-01, -1.931e-02, 5.454e-02, -1.024e-01, -1.373e-03, -1.027e-01, -1.894e-02, 2.436e-02, -7.983e-02, 1.831e-01, -7.232e-02, -9.584e-02, 2.270e-01, 4.128e-01, -1.151e-01));
	r += mul(s6_2, M4(9.899e-03, -8.969e-02, -1.024e-01, -9.267e-02, -5.787e-02, -5.934e-03, 3.101e-02, -8.005e-03, 1.361e-02, 5.110e-02, 1.814e-01, 9.458e-03, -5.395e-02, -1.132e-01, 8.022e-02, -9.348e-02));
	r += mul(s6_3, M4(-1.691e-02, 1.228e-01, -4.414e-01, -4.493e-02, 6.157e-02, 1.725e-01, 1.978e-01, 9.639e-02, -2.200e-01, 1.519e-02, -6.653e-02, -1.240e-01, 1.104e-01, -2.867e-01, -1.772e-01, -1.694e-01));
	r += mul(s6_4, M4(4.175e-03, -3.042e-03, -4.069e-01, -1.799e-01, 7.398e-02, 1.979e-01, -2.934e-01, -3.110e-01, -2.453e-01, 3.449e-01, 2.932e-03, 4.631e-01, -5.239e-02, 4.868e-02, -1.146e-01, -3.701e-01));
	r += mul(s6_5, M4(-1.675e-02, -3.687e-01, 5.851e-02, -9.250e-02, 4.892e-02, -1.813e-01, 4.720e-03, 3.314e-02, -1.723e-01, -1.135e-01, 3.627e-02, 5.550e-02, 6.245e-02, 3.006e-01, 1.394e-01, 1.003e-01));
	r += mul(s6_6, M4(-3.407e-02, -2.386e-01, 5.349e-02, -2.292e-01, 9.524e-02, 4.638e-02, -7.525e-02, -2.798e-02, 1.035e-01, 1.299e-01, 3.415e-01, -1.354e-01, 7.460e-02, -1.440e-01, 1.353e-01, 3.060e-03));
	r += mul(s6_7, M4(-1.258e-01, 2.745e-02, 1.231e-01, 1.713e-01, -2.098e-02, 7.189e-02, -1.986e-01, 2.369e-01, 1.856e-01, 4.211e-01, 6.151e-03, 6.073e-01, -3.834e-02, -2.427e-01, 5.039e-02, 1.161e-02));
	r += mul(s6_8, M4(-1.989e-02, -4.339e-03, 2.842e-01, -1.031e-02, 1.369e-02, -1.014e-01, -6.263e-02, 1.122e-01, -1.563e-01, -6.126e-01, 1.238e-01, 5.685e-02, 4.562e-02, -1.745e-01, 1.086e-01, -1.466e-01));
	r += mul(s7_0, M4(-4.805e-02, -1.401e-01, 6.923e-02, 1.697e-01, -2.523e-01, 8.848e-02, 1.435e-01, -2.233e-01, -5.380e-02, -6.852e-02, -1.428e-01, 2.790e-03, -2.441e-02, -2.829e-02, -5.574e-02, 1.381e-01));
	r += mul(s7_1, M4(1.177e-01, -7.392e-02, 1.024e-02, -1.602e-01, 7.005e-02, -5.573e-02, 2.307e-01, -7.645e-02, -3.209e-02, -7.073e-02, -1.727e-01, -1.063e-02, 6.527e-02, -3.432e-01, -4.096e-02, 7.950e-03));
	r += mul(s7_2, M4(-8.284e-02, 1.449e-01, -1.798e-02, -3.799e-02, -1.531e-03, -2.513e-01, -9.600e-02, -4.397e-02, -2.352e-02, 7.292e-03, -6.193e-02, 7.588e-02, -8.963e-02, 9.885e-04, -1.486e-01, 5.317e-02));
	r += mul(s7_3, M4(-6.310e-02, -2.401e-01, 4.052e-02, 1.000e-01, -8.855e-02, -2.369e-01, 2.686e-01, -7.257e-02, 2.088e-02, -2.243e-01, 3.193e-03, 1.760e-01, -6.365e-02, 9.996e-02, 2.837e-03, -8.671e-02));
	r += mul(s7_4, M4(8.164e-02, -4.756e-02, 2.309e-02, -2.026e-01, 4.132e-01, 4.186e-01, -2.936e-01, -2.088e-01, 5.347e-02, -2.918e-01, 2.646e-01, 9.397e-02, 4.247e-02, -6.289e-02, 7.642e-02, -2.434e-01));
	r += mul(s7_5, M4(-3.417e-02, 3.304e-02, -2.885e-02, 1.089e-02, -1.627e-02, 1.389e-01, 1.373e-01, 2.479e-02, 3.377e-02, 1.498e-01, 7.101e-02, 4.019e-02, 7.393e-03, 2.319e-01, -2.269e-02, 1.284e-01));
	r += mul(s7_6, M4(1.941e-02, 8.722e-02, -2.539e-02, 4.058e-02, -8.321e-02, -1.395e-01, -1.337e-01, 6.340e-02, 1.466e-02, 2.007e-01, 4.412e-02, -9.243e-02, -1.015e-01, 1.885e-01, -4.207e-02, 1.015e-01));
	r += mul(s7_7, M4(3.785e-02, 4.694e-02, 7.932e-02, 2.261e-02, 1.802e-01, -3.702e-01, 2.005e-02, 4.594e-03, 8.157e-02, -9.254e-02, 8.060e-02, -4.165e-01, 6.297e-02, 5.093e-02, 4.032e-03, -2.049e-02));
	r += mul(s7_8, M4(9.597e-03, 1.507e-01, -4.030e-02, 1.059e-01, 4.280e-02, -1.442e-01, -7.376e-02, -1.243e-01, -7.370e-02, 1.650e-01, 1.268e-01, -2.555e-01, 3.424e-02, -5.523e-02, -6.058e-02, -1.255e-01));
	r += V4(2.168e-01, -7.048e-03, 9.668e-03, -7.673e-03);
	return r;
}

V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) {
	V4 r = 0.0;
	r += mul(s0_0, M4(2.536e-02, -1.410e-02, 1.647e-02, 3.362e-02, -5.931e-02, 5.242e-02, 8.443e-02, 1.832e-02, -6.297e-02, 1.131e-02, 1.886e-03, 7.406e-02, 2.468e-01, 3.622e-02, 9.487e-02, -1.406e-01));
	r += mul(s0_1, M4(1.708e-02, 5.549e-02, 8.309e-02, -3.319e-01, -8.135e-02, -6.274e-02, 4.542e-02, -1.088e-02, -6.131e-05, -6.698e-02, 2.377e-01, 1.074e-02, 2.328e-01, -5.083e-02, 5.312e-02, 3.142e-02));
	r += mul(s0_2, M4(2.108e-01, -2.520e-01, -1.814e-01, -2.618e-01, 1.928e-01, 1.105e-01, 3.550e-01, 2.058e-01, 3.686e-02, -1.207e-01, -3.100e-02, -3.347e-03, -2.387e-02, 1.090e-01, -5.270e-02, -1.661e-01));
	r += mul(s0_3, M4(-1.782e-01, 9.148e-02, 1.673e-02, 1.584e-01, 9.863e-02, -6.084e-02, -1.283e-01, 4.255e-02, 3.218e-02, -1.173e-01, -2.996e-02, -7.470e-02, -1.136e-01, -6.690e-02, 1.829e-02, -2.640e-01));
	r += mul(s0_4, M4(-2.231e-01, -6.642e-02, -1.165e-01, 4.834e-01, 1.864e-01, 1.146e-01, 6.213e-02, -1.358e-01, -2.288e-02, -9.995e-02, -1.871e-02, -3.403e-02, 4.270e-02, -1.442e-01, 2.548e-01, -3.048e-01));
	r += mul(s0_5, M4(6.320e-02, 5.797e-02, 1.611e-01, -1.258e-01, 5.108e-02, -6.501e-02, -2.157e-01, -5.767e-02, -4.806e-03, 5.627e-02, 8.263e-02, 6.277e-02, 1.150e-01, 1.506e-01, 3.555e-02, -2.758e-01));
	r += mul(s0_6, M4(6.143e-02, -1.360e-01, 6.878e-02, 1.956e-02, 4.439e-02, -6.323e-02, -5.701e-02, -3.591e-02, -3.441e-02, 3.028e-02, -9.030e-02, -2.044e-01, -1.354e-01, -1.885e-01, 6.433e-02, -8.726e-02));
	r += mul(s0_7, M4(2.577e-02, 1.404e-01, -5.843e-02, 9.891e-02, 4.416e-02, -1.072e-01, -2.792e-03, -2.544e-02, 8.820e-02, 1.357e-01, -5.828e-02, 1.665e-01, 8.421e-02, 7.829e-02, -1.086e-02, 1.252e-01));
	r += mul(s0_8, M4(1.450e-01, 4.198e-02, -3.822e-03, 9.691e-02, 2.170e-02, 1.878e-01, -1.555e-01, 1.012e-01, -2.978e-02, 7.988e-02, -5.955e-02, 6.559e-02, -1.048e-01, -2.250e-01, 6.121e-02, -1.443e-01));
	r += mul(s1_0, M4(1.716e-01, 1.011e-01, 8.449e-02, -1.361e-01, 7.292e-02, -2.243e-01, 9.731e-02, -8.865e-02, -2.096e-01, -4.386e-01, -1.018e-01, -1.794e-01, -1.590e-02, 1.191e-01, -1.300e-01, 7.400e-02));
	r += mul(s1_1, M4(-2.492e-01, 1.453e-01, 2.022e-01, 4.891e-02, -7.163e-02, -1.665e-01, 2.042e-02, 5.348e-01, 3.951e-01, 4.514e-01, 1.276e-01, 1.216e-01, 3.184e-02, 6.502e-02, -2.052e-01, 1.890e-01));
	r += mul(s1_2, M4(3.835e-02, -5.284e-02, -2.494e-01, 2.411e-01, -5.676e-03, -1.005e-01, 2.276e-01, -7.575e-01, 1.925e-01, 4.742e-01, -4.667e-01, -1.748e-01, -4.170e-01, -1.441e-01, -3.124e-03, 3.889e-01));
	r += mul(s1_3, M4(1.985e-01, -1.217e-01, 6.853e-02, -6.425e-01, -1.093e-01, 3.058e-02, 1.008e-01, -1.000e-02, -4.686e-02, 3.577e-01, 5.252e-02, 2.789e-02, 1.321e-01, 2.989e-01, -7.806e-02, 1.051e-01));
	r += mul(s1_4, M4(2.250e-01, -4.267e-01, 7.196e-02, -8.018e-01, 1.341e-01, 1.303e-01, 1.809e-01, 6.113e-01, 1.488e-02, 8.244e-02, 9.509e-02, -6.869e-02, 3.005e-01, 2.894e-01, 1.626e-01, 5.481e-02));
	r += mul(s1_5, M4(3.079e-01, -2.656e-01, -4.390e-02, 1.123e-01, 3.129e-02, 2.423e-01, 1.855e-01, 6.808e-02, 1.320e-01, 2.064e-02, -2.904e-02, -1.324e-01, -8.232e-02, -1.363e-01, -5.007e-01, 3.797e-01));
	r += mul(s1_6, M4(5.187e-02, -1.303e-02, -1.266e-01, -1.807e-01, 1.205e-01, -7.534e-02, 2.407e-01, 3.149e-02, 8.110e-03, -3.276e-02, 8.138e-02, -5.819e-02, -6.869e-02, -1.575e-01, -4.518e-02, 4.960e-02));
	r += mul(s1_7, M4(7.838e-02, -1.572e-01, -8.077e-02, -5.178e-01, 1.536e-02, -1.072e-01, 1.329e-01, 3.574e-02, -1.702e-01, -2.425e-01, -1.537e-01, -3.181e-01, 1.056e-01, 3.161e-01, -1.665e-01, 6.447e-02));
	r += mul(s1_8, M4(-1.255e-01, -1.266e-01, -3.164e-02, -2.112e-01, -1.026e-01, 2.861e-01, 1.551e-02, -1.131e-02, 6.266e-02, -2.642e-01, -2.798e-01, 2.402e-01, -2.326e-01, 2.260e-01, -8.163e-02, 5.749e-02));
	r += mul(s2_0, M4(-8.472e-02, 3.525e-02, -3.044e-03, 8.102e-02, -1.510e-01, 2.814e-02, -2.020e-01, -2.593e-02, -1.469e-01, 2.096e-01, 1.935e-01, 1.393e-01, -3.591e-01, 4.131e-01, -7.662e-01, 2.411e-01));
	r += mul(s2_1, M4(3.957e-02, -6.504e-02, 3.046e-01, -8.791e-02, 2.511e-02, 1.664e-01, 2.942e-01, -9.583e-02, 1.185e-01, 6.388e-02, -1.285e-01, 2.930e-01, -3.716e-01, 9.728e-01, -2.521e-01, 5.220e-02));
	r += mul(s2_2, M4(7.598e-02, 9.158e-02, 2.824e-02, -6.247e-02, -1.126e-01, 8.460e-02, 5.086e-02, 1.760e-01, 3.303e-02, 2.893e-01, 3.190e-01, 6.632e-03, 6.721e-01, -1.753e-01, -2.229e-01, 2.627e-01));
	r += mul(s2_3, M4(1.787e-01, -1.077e-02, -5.198e-02, -7.528e-02, 1.769e-03, 1.579e-01, 1.184e-01, -8.013e-02, 6.146e-03, -2.568e-01, 7.215e-02, -1.016e-01, 3.469e-01, -3.423e-01, -6.154e-02, -1.618e-01));
	r += mul(s2_4, M4(1.202e-02, -1.281e-01, -1.330e-01, -1.741e-02, 9.794e-02, -1.963e-01, -8.935e-02, 8.661e-02, 3.888e-01, -8.260e-02, -1.119e-01, -1.267e-02, -1.810e-01, 7.144e-02, 5.592e-01, -2.022e-01));
	r += mul(s2_5, M4(-6.198e-02, -1.699e-03, 9.683e-02, -1.632e-02, 1.796e-03, 3.730e-02, -1.630e-01, -7.638e-04, 1.343e-01, -3.744e-02, -3.589e-01, -3.690e-02, 4.693e-01, -1.661e-01, -2.354e-01, -1.382e-03));
	r += mul(s2_6, M4(1.837e-02, -2.326e-02, -1.332e-01, -2.146e-02, 1.084e-01, -1.774e-01, 2.879e-03, 1.380e-02, 3.178e-02, 2.288e-01, 4.481e-02, 1.634e-01, -3.233e-01, -1.216e-01, 6.813e-02, 3.430e-02));
	r += mul(s2_7, M4(1.806e-01, -1.755e-01, 2.481e-02, 8.190e-02, -1.475e-02, -1.077e-01, 1.408e-02, -1.033e-02, 2.499e-02, -8.072e-02, -9.272e-02, -4.949e-02, -4.336e-01, 5.354e-01, 1.022e+00, -1.878e-01));
	r += mul(s2_8, M4(-3.080e-03, 6.252e-02, 4.382e-02, 1.736e-01, -1.568e-02, 1.294e-01, 1.588e-01, -3.209e-03, -5.668e-02, -2.989e-02, 2.673e-01, -1.652e-01, -1.984e-01, -2.244e-01, -1.289e-01, -2.206e-02));
	r += mul(s3_0, M4(-3.345e-02, -6.145e-02, 1.239e-01, 1.860e-01, 5.689e-01, 5.143e-02, -1.748e-01, -4.751e-01, -2.738e-01, -1.132e-01, -1.073e-01, -5.956e-02, 6.889e-02, 4.414e-02, -5.410e-02, 2.421e-02));
	r += mul(s3_1, M4(-4.755e-02, -3.658e-02, 1.941e-01, 1.070e-01, 6.188e-02, -4.438e-01, 6.193e-02, -8.983e-02, 8.250e-02, -3.456e-02, -3.287e-03, 1.277e-01, 8.643e-02, 3.769e-02, -1.689e-01, -5.630e-02));
	r += mul(s3_2, M4(4.335e-02, 1.358e-02, 5.347e-02, 1.566e-01, -4.897e-02, -1.998e-01, 2.272e-01, -7.817e-02, -1.532e-01, 3.866e-02, -4.835e-02, -8.465e-03, -1.622e-01, -8.045e-02, 8.395e-02, -1.099e-01));
	r += mul(s3_3, M4(-2.014e-01, 8.535e-02, -1.486e-01, 4.427e-01, 5.379e-01, -2.338e-02, -1.982e-01, -1.629e-01, -9.256e-03, 1.563e-01, -2.911e-02, -6.092e-02, -7.215e-02, 1.460e-01, -4.224e-03, 4.965e-02));
	r += mul(s3_4, M4(5.919e-02, 3.212e-01, -3.621e-02, -4.207e-01, 6.892e-02, 8.369e-02, 4.878e-02, -1.324e-01, -1.539e-01, 9.862e-03, -1.181e-01, 1.386e-01, 4.491e-02, 4.382e-02, 5.232e-02, 1.089e-01));
	r += mul(s3_5, M4(-4.122e-02, 1.981e-01, 3.009e-02, 1.198e-01, 6.533e-01, -1.429e-01, -4.168e-01, -8.503e-02, 3.888e-02, 3.679e-02, -1.368e-01, -3.706e-02, 4.718e-02, -1.036e-01, 6.045e-02, 2.219e-02));
	r += mul(s3_6, M4(-1.123e-01, -1.056e-01, -3.821e-02, -8.801e-02, -4.420e-02, 4.255e-03, -2.447e-01, 4.728e-03, 1.282e-01, 1.008e-01, 1.862e-01, -9.000e-03, -1.476e-02, -6.480e-02, 1.721e-02, 8.208e-02));
	r += mul(s3_7, M4(8.583e-02, -5.756e-02, -2.266e-01, -3.114e-01, -2.369e-01, 1.099e-01, -2.180e-01, -2.813e-01, -8.890e-02, -8.020e-02, 1.347e-01, -5.877e-02, 1.781e-01, 1.528e-02, 2.447e-02, -6.176e-02));
	r += mul(s3_8, M4(-5.824e-02, -2.396e-02, -1.056e-01, 2.120e-01, 7.797e-02, -7.151e-03, 9.115e-02, -1.450e-01, 7.011e-02, 2.717e-02, -1.055e-01, -8.077e-02, -1.130e-01, -1.042e-01, -6.020e-03, 5.198e-03));
	r += mul(s4_0, M4(5.166e-03, 1.304e-01, -4.617e-02, 2.465e-01, -1.138e-01, -8.594e-02, 2.374e-02, 2.387e-01, -1.221e-01, 3.200e-01, -4.661e-02, -2.573e-01, -1.490e-01, 8.570e-02, 1.173e-01, -8.304e-02));
	r += mul(s4_1, M4(-1.341e-01, -9.204e-02, 1.326e-01, -4.510e-02, 1.127e-01, -1.129e-01, -9.003e-02, 6.497e-02, -8.473e-02, 2.957e-01, -5.822e-02, 2.001e-01, -1.312e-01, 1.851e-01, 6.737e-02, 2.108e-01));
	r += mul(s4_2, M4(-1.810e-01, 5.466e-02, 1.819e-01, 1.517e-01, 7.883e-02, -2.586e-02, -1.670e-01, -7.121e-02, -2.213e-01, 2.562e-01, -1.028e-02, 1.537e-01, -2.064e-01, 1.528e-01, 2.646e-01, 8.116e-02));
	r += mul(s4_3, M4(1.251e-01, -1.643e-01, -1.100e-01, -1.026e-01, -1.469e-01, 1.956e-01, -1.132e-01, -2.920e-01, -1.660e-01, 4.833e-01, 7.354e-02, 3.729e-01, 2.297e-01, 1.096e-01, 1.608e-01, -9.050e-02));
	r += mul(s4_4, M4(-1.269e-01, -1.451e-01, -1.771e-01, -2.583e-02, 2.799e-01, -7.998e-02, -7.126e-02, -3.134e-01, -1.531e-01, 2.349e-01, -6.403e-02, 3.341e-01, 5.373e-01, 3.720e-01, 5.513e-02, -3.110e-02));
	r += mul(s4_5, M4(2.867e-01, -7.715e-02, -2.043e-02, -1.149e-01, -7.275e-02, 1.572e-01, 1.648e-02, -1.160e-03, -8.993e-02, -1.441e-02, -2.222e-01, 1.281e-01, 6.476e-02, 2.251e-01, -2.359e-01, -1.092e-01));
	r += mul(s4_6, M4(-1.415e-02, 1.372e-01, -1.024e-02, -1.359e-01, -1.174e-01, 1.206e-01, 3.089e-02, 9.655e-02, -7.835e-02, 2.261e-01, 8.681e-02, 3.623e-01, 1.141e-01, -1.457e-03, -1.088e-01, -1.918e-01));
	r += mul(s4_7, M4(-1.089e-01, -7.624e-02, -2.815e-02, -6.304e-02, -7.850e-02, 2.300e-01, 1.237e-02, -1.772e-01, -1.581e-01, 1.061e-01, 1.307e-02, 2.340e-02, -1.419e-01, -1.174e-01, -2.259e-01, -2.288e-01));
	r += mul(s4_8, M4(1.302e-01, -4.664e-03, -3.681e-02, 5.637e-02, -1.678e-01, 8.718e-02, -2.442e-02, -7.711e-02, 4.939e-02, 2.992e-01, 1.165e-01, -4.479e-02, -1.839e-01, 1.086e-01, -9.941e-02, -4.750e-02));
	r += mul(s5_0, M4(2.046e-01, 1.038e-01, -4.133e-01, -2.010e-01, 2.893e-02, -1.592e-01, 1.398e-01, -7.004e-02, 1.399e-01, -1.197e-01, 2.010e-02, -6.009e-02, 1.121e-01, -1.981e-01, -1.484e-01, -6.293e-02));
	r += mul(s5_1, M4(2.744e-02, -1.539e-01, -4.901e-01, -1.259e-01, 7.296e-03, 1.802e-03, -3.523e-02, 2.218e-02, -2.658e-02, -1.041e-01, 1.138e-02, 9.445e-02, 3.299e-02, -2.628e-01, -1.952e-01, -2.199e-01));
	r += mul(s5_2, M4(1.606e-02, 1.991e-01, 1.412e-01, 1.806e-03, -2.459e-01, -5.529e-02, -1.762e-01, 9.896e-02, 2.059e-01, -1.678e-01, 3.718e-02, 2.952e-02, 3.115e-01, -1.161e-01, -2.211e-01, -1.061e-01));
	r += mul(s5_3, M4(-3.352e-01, 1.077e-01, 2.678e-01, -4.436e-01, 1.087e-02, -1.374e-02, 2.687e-02, 6.731e-02, 5.617e-02, -9.116e-02, 1.330e-02, -9.804e-02, -1.134e-01, -2.236e-02, -1.948e-01, -7.179e-02));
	r += mul(s5_4, M4(2.647e-01, -2.525e-02, -2.016e-01, -1.311e-01, 3.206e-01, 2.642e-01, 9.656e-02, 5.895e-02, 5.513e-02, -2.987e-02, 8.391e-02, -1.445e-01, -2.961e-01, -5.127e-01, -1.732e-01, 3.098e-02));
	r += mul(s5_5, M4(-8.694e-02, 6.743e-01, 3.185e-01, 1.705e-01, -7.492e-02, 5.664e-02, -1.143e-01, 1.243e-01, 1.243e-01, -1.035e-01, 5.420e-02, -1.083e-01, -1.275e-01, -2.274e-01, -2.832e-01, -1.849e-02));
	r += mul(s5_6, M4(1.238e-01, 1.585e-01, 5.295e-01, 8.451e-02, 5.142e-02, -2.950e-02, 2.497e-02, 1.916e-01, -7.226e-02, -1.434e-01, 1.111e-01, -1.004e-01, 2.997e-02, -1.751e-02, -5.959e-02, -4.726e-02));
	r += mul(s5_7, M4(8.209e-01, 1.719e-01, 5.450e-01, -2.042e-02, 9.534e-02, 1.269e-01, 5.457e-02, -1.328e-02, -2.037e-01, 9.191e-02, -8.484e-02, 5.416e-03, 1.403e-02, -3.179e-02, 2.140e-01, -4.565e-02));
	r += mul(s5_8, M4(1.142e-01, 5.137e-01, 1.714e-01, 5.144e-01, -1.404e-02, -4.950e-02, 1.041e-01, 1.605e-01, -4.338e-02, 4.684e-02, -1.573e-02, -1.635e-02, 5.163e-02, -1.147e-01, 6.578e-02, 2.531e-01));
	r += mul(s6_0, M4(1.340e-01, -2.285e-01, -9.562e-02, 1.714e-01, 2.930e-01, 8.161e-02, -4.617e-02, -4.056e-02, -2.496e-03, 4.013e-03, -5.556e-02, 2.695e-02, 2.549e-01, -2.422e-01, -5.763e-03, -3.592e-01));
	r += mul(s6_1, M4(-2.073e-01, -2.901e-01, 2.882e-01, 1.224e-01, 2.254e-01, -6.127e-02, 1.563e-01, 1.424e-01, -1.351e-01, 1.496e-02, 2.001e-02, 2.628e-02, 8.940e-02, -3.824e-01, -2.008e-02, -2.165e-01));
	r += mul(s6_2, M4(1.411e-01, 7.237e-02, -7.323e-02, -6.832e-02, -2.638e-02, 2.173e-02, -5.183e-02, -2.793e-02, 1.588e-01, -3.605e-02, -1.937e-01, 2.666e-01, 3.727e-02, -1.306e-01, -1.939e-01, -2.043e-01));
	r += mul(s6_3, M4(-9.488e-02, -1.844e-01, 7.070e-02, -1.471e-01, -9.504e-03, -1.100e-01, 5.983e-02, -7.031e-03, 2.339e-01, 2.060e-01, -1.541e-01, -1.469e-01, -6.130e-02, -1.651e-01, 5.473e-03, -4.535e-01));
	r += mul(s6_4, M4(-9.408e-02, 1.051e-01, 5.935e-01, 2.440e-01, 4.207e-02, -1.485e-01, 2.241e-01, -1.543e-01, 4.195e-02, -1.073e-01, -3.652e-01, 7.859e-02, 2.443e-01, -3.602e-01, 7.919e-02, -3.963e-01));
	r += mul(s6_5, M4(-9.282e-02, -5.716e-03, 2.926e-01, 1.167e-02, 4.879e-02, 3.732e-02, -8.926e-02, 4.256e-02, -1.788e-01, 2.622e-01, 1.357e-01, 7.940e-02, 1.382e-01, 6.902e-02, -1.767e-01, -1.719e-01));
	r += mul(s6_6, M4(-1.696e-03, -1.124e-01, 1.223e-01, -2.895e-01, 1.031e-01, -7.886e-02, 3.023e-02, 7.795e-02, -4.661e-02, -9.979e-02, 6.993e-02, -7.016e-02, 4.502e-02, -7.555e-02, -6.096e-02, -3.440e-01));
	r += mul(s6_7, M4(-3.132e-02, -1.518e-01, 9.386e-02, 3.184e-02, -2.529e-02, 2.358e-01, -1.085e-01, 1.057e-01, 7.691e-03, -2.959e-01, 3.834e-03, 1.955e-01, 1.709e-01, -3.687e-02, 8.499e-02, 2.509e-02));
	r += mul(s6_8, M4(3.696e-02, 5.454e-02, -1.603e-01, 1.265e-01, -9.435e-02, 5.880e-02, -3.653e-02, 6.345e-02, -1.639e-02, -2.848e-01, -6.280e-02, 2.619e-01, 7.176e-02, -4.521e-02, 5.806e-02, 8.413e-02));
	r += mul(s7_0, M4(-8.326e-02, -6.503e-02, -2.698e-01, 1.909e-01, 2.131e-02, -2.712e-01, 7.258e-02, -5.686e-03, 2.315e-02, 1.176e-01, -3.460e-02, -2.184e-01, -1.923e-01, 4.375e-02, -3.431e-03, 1.264e-01));
	r += mul(s7_1, M4(1.767e-01, 1.411e-01, 5.236e-02, -2.954e-02, -5.819e-02, -1.108e-01, -1.118e-02, -9.685e-02, -2.081e-02, -7.350e-02, 5.443e-02, -1.465e-01, -1.765e-01, 1.862e-01, -3.695e-02, 2.506e-01));
	r += mul(s7_2, M4(9.326e-02, 6.618e-03, -7.719e-02, -1.290e-01, -3.512e-02, -8.315e-02, -3.536e-01, 8.163e-02, -8.239e-02, 1.151e-01, 7.672e-02, -5.276e-02, -2.285e-01, -2.168e-02, 1.083e-01, 1.704e-01));
	r += mul(s7_3, M4(2.332e-02, 1.445e-01, 1.241e-01, -6.039e-02, 8.266e-02, -1.892e-01, -5.408e-01, 8.206e-02, 2.250e-01, 2.425e-01, 6.192e-02, 5.134e-03, -5.053e-02, 2.632e-01, -7.094e-03, 9.883e-02));
	r += mul(s7_4, M4(-2.304e-02, -8.378e-02, -6.218e-02, -1.157e-01, -4.370e-02, 1.416e-01, 4.883e-01, 1.203e-01, -1.086e-01, 5.591e-02, -2.372e-01, 5.909e-02, -1.630e-01, 1.430e-01, 8.641e-02, -4.704e-03));
	r += mul(s7_5, M4(-1.545e-01, -6.740e-02, 1.799e-01, -4.104e-02, -3.570e-02, 2.898e-01, -1.500e-01, -2.319e-01, -1.425e-01, -4.409e-02, 2.314e-01, 2.013e-02, -1.138e-01, 1.329e-01, -1.275e-01, 9.538e-02));
	r += mul(s7_6, M4(6.026e-02, 2.461e-01, 6.967e-02, 1.197e-01, -7.590e-02, 7.220e-02, -6.694e-02, -6.113e-01, 6.629e-02, 8.935e-02, -1.509e-01, 1.458e-01, -3.097e-02, 8.744e-02, 5.177e-02, 1.485e-01));
	r += mul(s7_7, M4(-7.051e-02, -1.119e-01, -7.987e-02, 3.374e-02, -2.263e-01, 5.299e-02, -2.333e-01, -6.558e-01, 9.406e-02, -2.782e-01, 1.104e-01, -2.358e-01, -1.142e-01, -4.699e-02, -2.193e-01, -5.177e-02));
	r += mul(s7_8, M4(-1.533e-02, -4.159e-02, -9.005e-02, -2.094e-02, 4.571e-02, 8.586e-02, -7.003e-02, -1.314e-01, -2.340e-02, -3.771e-02, 9.190e-02, 1.260e-01, -1.660e-01, -9.283e-03, 8.861e-02, -9.932e-02));
	r += V4(-4.065e-02, 3.730e-03, 1.817e-03, 1.345e-02);
	return r;
}

V4 f2(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) {
	V4 r = 0.0;
	r += mul(s0_0, M4(9.513e-02, -1.023e-01, 1.587e-01, 1.025e-01, -6.720e-02, 1.538e-01, -3.814e-02, 2.291e-02, 8.159e-02, -5.872e-02, 8.317e-02, 1.526e-01, 1.371e-01, -4.979e-04, -1.534e-01, -3.054e-02));
	r += mul(s0_1, M4(-5.019e-03, 1.215e-01, -2.858e-01, 1.509e-01, -7.670e-02, -1.181e-01, -3.646e-02, -5.105e-02, 1.079e-01, 8.831e-02, -1.282e-02, -1.592e-01, 2.819e-01, -1.678e-01, 1.269e-01, 2.218e-02));
	r += mul(s0_2, M4(-2.377e-01, -3.966e-02, -4.808e-02, -1.515e-03, 2.682e-02, -1.740e-02, 1.949e-01, -2.585e-01, 1.936e-01, -2.997e-02, 6.483e-02, -1.305e-01, 8.261e-02, -1.004e-01, 7.083e-03, -8.298e-02));
	r += mul(s0_3, M4(-1.870e-02, -2.180e-02, 1.287e-01, -1.681e-02, 5.307e-02, -1.779e-02, -3.764e-02, -6.278e-03, -6.895e-02, -1.381e-01, 8.012e-02, 1.061e-02, -1.052e-02, 6.920e-02, -9.246e-02, -1.925e-01));
	r += mul(s0_4, M4(1.152e-01, 3.707e-01, -3.938e-01, -2.672e-02, -2.630e-02, -3.670e-01, 9.251e-02, 1.047e-01, 3.332e-02, 3.702e-02, 3.343e-02, -4.699e-02, 1.887e-01, -3.178e-01, 4.346e-01, -1.446e-01));
	r += mul(s0_5, M4(-1.992e-01, 2.275e-01, -1.044e-01, -1.716e-01, 8.659e-03, -1.067e-01, -5.529e-02, 1.015e-01, -2.028e-01, -6.617e-03, 3.986e-02, -1.639e-01, 6.785e-01, 1.518e-01, 1.681e-01, -1.835e-01));
	r += mul(s0_6, M4(-1.659e-01, -8.147e-02, 1.015e-01, 5.627e-02, -6.553e-02, 1.071e-01, -1.059e-01, -1.409e-01, 1.422e-02, -9.457e-02, -1.223e-01, 9.262e-02, 2.481e-01, 3.273e-01, -4.202e-02, -1.980e-01));
	r += mul(s0_7, M4(1.837e-01, 1.838e-01, 4.286e-02, -2.344e-02, 1.487e-01, 3.209e-02, -3.188e-02, 8.071e-02, 9.691e-03, 8.573e-02, 2.254e-01, -5.335e-02, 6.388e-02, 7.047e-02, -1.156e-01, -1.170e-01));
	r += mul(s0_8, M4(-2.658e-01, 1.008e-01, 3.286e-02, 2.389e-03, 7.517e-02, -2.851e-02, 6.483e-02, 7.479e-02, -1.277e-01, 7.402e-02, -1.648e-01, 6.628e-02, 1.634e-01, 1.439e-01, -2.544e-01, 4.385e-02));
	r += mul(s1_0, M4(-1.361e-02, -2.189e-02, -1.571e-01, 7.063e-02, 1.665e-01, 1.246e-01, 2.842e-01, -3.367e-02, -2.822e-01, -2.907e-01, -5.340e-01, -1.405e-01, -3.345e-02, -7.615e-02, 2.898e-02, 5.967e-02));
	r += mul(s1_1, M4(-4.387e-02, -4.236e-01, 4.281e-01, -1.953e-01, 7.487e-01, 7.529e-01, -5.215e-01, 1.123e-01, -1.363e-01, 1.408e-02, -1.552e-01, 3.233e-01, -2.761e-01, 8.336e-02, -7.412e-02, -9.009e-02));
	r += mul(s1_2, M4(-4.157e-02, -3.318e-02, 2.718e-01, 1.120e-01, 3.292e-01, 1.252e-01, -2.324e-01, -5.152e-03, -3.227e-01, -2.902e-02, -4.069e-01, 2.859e-01, 2.375e-01, -4.981e-02, -1.607e-01, -1.368e-02));
	r += mul(s1_3, M4(2.961e-02, -3.944e-01, 6.968e-02, 6.573e-02, 2.166e-02, 1.155e-01, 1.526e-01, 1.394e-01, 1.478e-01, 7.622e-02, 5.468e-02, 4.773e-02, -1.586e-02, -1.204e-02, 5.099e-02, 1.672e-01));
	r += mul(s1_4, M4(-4.088e-01, -1.786e-02, 3.435e-01, 1.761e-01, 4.563e-01, 7.573e-01, -3.591e-01, -1.546e-01, -2.650e-01, 8.012e-02, -3.056e-01, 1.255e-01, -1.513e-01, -2.147e-01, -8.252e-02, 2.772e-02));
	r += mul(s1_5, M4(-1.657e-01, -1.217e-01, 3.345e-01, -2.820e-01, 3.113e-03, 4.717e-01, 4.105e-02, 2.253e-01, 2.879e-01, 1.124e-01, 6.953e-02, 5.647e-01, -2.668e-01, -3.415e-01, -1.983e-02, 1.841e-01));
	r += mul(s1_6, M4(5.096e-02, -3.405e-02, 2.642e-01, -1.489e-01, -1.688e-02, -1.223e-01, 2.398e-01, 9.426e-02, -2.125e-01, -1.438e-01, 5.223e-02, -2.062e-01, -2.646e-01, 1.598e-01, 4.466e-02, 2.793e-01));
	r += mul(s1_7, M4(-1.469e-01, -2.421e-01, 3.448e-01, -4.945e-02, 2.766e-01, 1.970e-01, -2.203e-01, -6.525e-02, 2.432e-02, 4.168e-01, 4.742e-01, 2.581e-01, -1.645e-01, -4.138e-01, -1.509e-01, 1.089e-01));
	r += mul(s1_8, M4(2.470e-01, -1.231e-01, -1.178e-01, -6.888e-02, -5.987e-02, -3.821e-02, -1.473e-01, 1.678e-01, 5.462e-01, 9.235e-02, -7.806e-01, 3.665e-01, -1.915e-01, -1.793e-01, -9.810e-02, 2.176e-01));
	r += mul(s2_0, M4(2.157e-01, 2.611e-02, 2.140e-02, 7.000e-02, 1.860e-01, -1.012e-02, 5.828e-02, 1.636e-01, 1.133e-02, 5.581e-02, -1.959e-01, 9.071e-03, -3.571e-01, 1.393e-01, -2.630e-01, -4.527e-01));
	r += mul(s2_1, M4(-2.209e-02, -8.153e-02, -2.016e-01, -2.546e-02, -1.248e-01, 3.990e-02, -2.968e-02, -1.432e-01, -7.041e-02, -2.290e-01, -1.667e-01, 2.402e-01, -3.291e-01, -3.824e-02, 4.528e-01, -2.175e-01));
	r += mul(s2_2, M4(4.376e-01, 3.842e-02, 4.605e-02, 3.200e-02, 2.267e-01, -1.208e-02, -2.008e-01, -1.760e-01, -1.106e-01, -7.952e-02, -2.439e-02, -2.233e-02, 2.287e-01, 7.038e-02, -8.782e-02, 2.208e-01));
	r += mul(s2_3, M4(-2.928e-01, -1.633e-01, 2.021e-01, -1.321e-01, -1.699e-01, -1.313e-01, 2.450e-01, 9.154e-02, -1.370e-01, -1.742e-01, 3.649e-01, 1.948e-02, -1.845e-01, 5.658e-01, -3.862e-01, -5.922e-01));
	r += mul(s2_4, M4(-1.180e-01, 5.187e-02, -1.602e-01, 1.942e-01, -6.294e-02, 6.854e-02, -4.731e-02, -1.045e-01, 4.786e-01, 1.082e-01, -2.489e-01, -3.709e-01, 1.893e-01, -8.286e-02, 1.363e-01, -5.499e-01));
	r += mul(s2_5, M4(3.056e-02, 8.957e-02, -1.323e-01, -4.396e-02, 1.382e-01, 4.930e-02, 1.285e-01, -1.072e-01, -8.383e-02, 7.365e-02, -3.295e-02, -4.595e-02, 3.012e-01, -3.066e-01, 3.639e-01, 6.333e-03));
	r += mul(s2_6, M4(1.122e-01, -1.318e-01, 2.506e-02, 7.950e-02, -1.492e-01, -1.961e-02, 1.083e-01, 6.986e-02, -6.250e-02, 1.755e-01, 1.868e-01, -1.286e-01, -4.907e-01, 1.165e-01, 2.817e-01, -7.604e-01));
	r += mul(s2_7, M4(5.926e-02, -1.446e-02, 6.740e-02, -1.877e-01, 1.512e-01, 4.305e-02, -4.368e-02, -3.655e-02, 2.004e-01, 8.357e-02, -4.056e-01, -2.699e-01, 6.772e-02, -5.004e-01, 7.243e-01, -6.036e-01));
	r += mul(s2_8, M4(-1.636e-01, 3.726e-02, -7.612e-02, -3.775e-02, -3.209e-02, -6.485e-02, -1.036e-01, 3.863e-02, -2.952e-01, -1.468e-01, 5.695e-01, -3.171e-01, 2.957e-01, -2.874e-01, -7.122e-02, 2.408e-01));
	r += mul(s3_0, M4(-2.847e-01, -6.624e-02, 3.070e-01, 2.705e-01, -4.489e-01, -2.211e-02, 6.367e-02, -1.580e-01, -3.082e-02, 1.637e-02, -5.281e-02, -2.793e-01, 1.842e-03, 1.121e-01, -1.861e-01, -2.153e-02));
	r += mul(s3_1, M4(-1.506e-01, -8.362e-03, 2.448e-01, 2.311e-01, -1.994e-04, 1.714e-01, -5.810e-02, -3.037e-01, -1.406e-01, 1.158e-02, -9.110e-02, -1.218e-03, 7.478e-02, -2.950e-03, 1.240e-02, 4.613e-02));
	r += mul(s3_2, M4(-1.082e-01, 2.173e-01, 1.511e-01, -3.761e-01, -1.362e-01, -7.930e-02, -3.676e-01, 3.172e-02, -1.701e-01, -8.242e-02, 7.806e-03, -9.085e-02, 8.387e-02, -4.756e-02, 1.034e-01, 2.831e-02));
	r += mul(s3_3, M4(1.168e-01, -1.872e-01, 2.581e-02, 1.804e-01, 3.309e-01, 3.506e-02, -1.452e-01, -5.199e-01, 8.211e-02, 1.174e-02, -3.079e-02, -7.860e-03, 2.571e-02, 2.033e-01, -2.844e-03, -5.911e-03));
	r += mul(s3_4, M4(5.956e-01, 3.077e-01, -2.823e-01, 4.851e-01, 2.023e-01, 8.281e-02, -1.973e-02, 2.091e-01, 7.000e-02, 2.272e-01, 7.937e-02, 1.465e-02, -3.652e-02, -8.854e-02, 5.284e-02, 2.072e-02));
	r += mul(s3_5, M4(2.745e-01, 1.015e-01, 1.909e-01, 1.966e-02, 8.355e-02, 1.783e-01, 1.539e-01, 2.331e-01, -1.056e-01, -5.163e-02, 2.818e-03, 2.078e-02, -6.584e-02, 1.102e-02, 6.753e-02, 3.359e-03));
	r += mul(s3_6, M4(1.170e-01, 2.291e-01, 1.342e-01, -1.054e-01, -3.271e-01, 3.134e-01, 2.687e-01, 3.008e-02, -9.659e-02, -2.473e-02, -3.813e-03, 5.018e-03, 5.568e-02, 9.386e-03, -1.370e-01, -1.200e-01));
	r += mul(s3_7, M4(2.118e-01, -1.038e-01, 4.191e-01, 2.680e-01, -1.326e-01, -2.662e-01, -2.487e-01, -1.841e-01, 4.636e-02, 3.607e-02, -1.344e-01, 1.520e-01, -5.490e-02, -5.336e-02, -3.617e-02, 2.010e-03));
	r += mul(s3_8, M4(7.668e-03, -3.285e-03, 1.146e-01, -5.370e-03, -2.923e-01, 2.723e-02, -1.948e-01, 7.522e-02, 7.848e-02, -1.212e-01, 2.810e-01, 8.272e-02, -3.596e-02, -1.255e-01, 7.025e-02, 6.561e-02));
	r += mul(s4_0, M4(-5.750e-02, -2.343e-03, 1.155e-02, 1.263e-01, 2.045e-01, -1.364e-01, 4.942e-02, 2.860e-02, -2.458e-01, -1.640e-01, 2.016e-01, 6.267e-02, 9.234e-02, -6.066e-02, 3.070e-02, 6.094e-02));
	r += mul(s4_1, M4(-3.113e-02, 7.467e-02, 2.200e-02, -6.583e-02, 2.668e-01, -7.219e-02, 1.316e-01, -9.913e-02, -4.762e-02, 4.021e-02, -2.226e-01, -1.450e-01, 9.304e-02, -8.959e-02, 1.760e-01, 2.760e-02));
	r += mul(s4_2, M4(2.147e-01, -8.705e-03, 4.760e-03, -2.295e-01, 9.373e-02, -8.850e-02, -3.576e-02, 2.126e-01, -1.836e-01, -1.994e-02, 2.873e-02, -2.957e-03, 1.928e-01, 4.375e-02, 6.175e-02, -5.182e-02));
	r += mul(s4_3, M4(-5.723e-02, -1.145e-01, 2.409e-01, 1.399e-01, 3.128e-02, 4.703e-03, -1.700e-01, -5.630e-02, -7.253e-02, -1.281e-01, -4.285e-03, 4.734e-02, -8.377e-02, -3.062e-01, 2.587e-01, 1.099e-01));
	r += mul(s4_4, M4(-7.614e-02, -9.925e-03, -8.455e-03, -4.035e-02, 2.741e-01, 7.492e-02, 6.086e-01, 1.481e-01, -1.592e-02, -7.169e-02, 1.228e-01, 1.198e-01, -1.680e-01, -2.289e-01, 4.790e-01, 3.902e-02));
	r += mul(s4_5, M4(-1.123e-01, 8.752e-02, 1.686e-02, -1.039e-01, -1.253e-01, -1.933e-02, 1.212e-03, 1.023e-01, 7.552e-02, -9.984e-02, 3.426e-02, 6.780e-02, 2.702e-02, -6.068e-02, 1.967e-01, 8.629e-02));
	r += mul(s4_6, M4(5.797e-02, -1.353e-01, 1.033e-01, -1.719e-01, -1.335e-01, 6.762e-02, 7.013e-02, -7.472e-02, 3.723e-02, -3.790e-02, 9.515e-02, 8.516e-02, -7.411e-02, -2.409e-01, -2.353e-02, 2.940e-01));
	r += mul(s4_7, M4(2.548e-02, 2.544e-03, -1.350e-02, -2.464e-01, -2.018e-01, -1.882e-02, -2.318e-01, -1.809e-01, -1.304e-02, 1.333e-01, 9.827e-02, 9.163e-02, -4.668e-02, -4.071e-02, -8.042e-03, -2.185e-03));
	r += mul(s4_8, M4(3.587e-02, 1.501e-01, -5.163e-01, 1.042e-01, -6.538e-02, -1.005e-01, -4.472e-02, 4.179e-02, 1.050e-01, -4.198e-02, 2.039e-01, 2.244e-02, -1.004e-01, -1.028e-01, 2.824e-02, 8.763e-02));
	r += mul(s5_0, M4(7.821e-02, 7.323e-02, -3.199e-02, -1.595e-01, 3.097e-01, -3.831e-02, 1.785e-02, -2.153e-01, 1.812e-01, -4.676e-02, 1.000e-01, 2.932e-02, -9.241e-02, 9.792e-02, 7.440e-02, -3.885e-02));
	r += mul(s5_1, M4(-3.028e-01, -3.757e-02, -5.032e-02, 2.849e-01, -2.772e-01, 8.332e-02, -2.052e-01, 5.729e-02, 6.911e-02, -7.431e-02, 1.082e-01, 1.315e-01, 9.834e-02, 3.055e-02, -1.315e-01, 5.726e-03));
	r += mul(s5_2, M4(-1.636e-01, 1.645e-01, -4.554e-02, -2.774e-01, -2.691e-02, -5.220e-02, 9.571e-02, -3.884e-02, 4.966e-02, 2.502e-02, -9.109e-02, -2.851e-02, 6.685e-02, 3.636e-02, 5.882e-02, -1.581e-01));
	r += mul(s5_3, M4(-1.163e-01, 2.638e-02, -1.828e-01, -2.552e-01, 3.410e-03, -6.034e-03, 4.494e-02, 1.072e-01, 2.371e-01, -2.228e-01, -1.056e-02, -1.276e-02, 3.036e-02, 2.416e-01, -3.011e-01, -2.545e-01));
	r += mul(s5_4, M4(1.485e-01, 2.530e-01, -2.858e-01, 4.832e-01, -5.213e-02, -2.858e-01, -1.698e-01, -1.453e-01, -1.771e-01, 1.007e-01, -1.706e-01, -1.012e-01, 3.282e-02, 5.874e-02, -5.467e-01, 3.825e-02));
	r += mul(s5_5, M4(-3.837e-01, 3.641e-03, 5.814e-01, 4.933e-01, 3.131e-02, 8.307e-02, -3.989e-02, 5.503e-02, 6.367e-02, -4.050e-02, 7.006e-02, 2.848e-02, -4.190e-02, 1.647e-01, -9.264e-02, -3.662e-02));
	r += mul(s5_6, M4(-9.389e-02, -2.259e-01, 1.881e-01, -7.548e-02, 3.967e-02, 8.459e-02, 1.937e-01, 2.617e-02, -1.179e-01, -1.718e-01, 1.474e-01, 1.257e-01, 3.605e-02, -2.829e-02, 2.272e-02, -3.789e-01));
	r += mul(s5_7, M4(-2.656e-02, -1.844e-01, 3.402e-01, 4.901e-01, -5.836e-02, 3.312e-02, -2.004e-01, -4.783e-02, -2.746e-01, 1.515e-01, 3.878e-02, -7.158e-02, -1.820e-02, 3.105e-01, -1.832e-02, -1.099e-03));
	r += mul(s5_8, M4(4.523e-01, 1.502e-01, -2.123e-01, 2.320e-01, 2.085e-01, 5.715e-02, 3.191e-02, 7.901e-02, -8.453e-02, 5.544e-02, 4.279e-03, -1.328e-01, -9.691e-02, -2.488e-02, -4.719e-02, -1.008e-01));
	r += mul(s6_0, M4(4.345e-01, -8.277e-02, -7.012e-02, 8.223e-02, -1.071e-01, 1.610e-01, 1.502e-01, 4.634e-02, -1.456e-01, 2.283e-02, 1.598e-01, 9.160e-02, 1.612e-02, -2.086e-01, -7.139e-02, -2.495e-01));
	r += mul(s6_1, M4(3.248e-01, -1.401e-01, -2.566e-02, 1.442e-01, 1.050e-01, -5.083e-02, 1.393e-01, 3.161e-02, -1.916e-01, 2.434e-02, 2.304e-01, -1.068e-02, -1.549e-01, 2.850e-01, -3.977e-02, 1.540e-01));
	r += mul(s6_2, M4(1.457e-02, 2.612e-02, -2.899e-02, -2.297e-01, 4.891e-02, -6.237e-02, -3.175e-02, 2.566e-02, 1.602e-01, 1.467e-02, -8.463e-02, -1.173e-01, 3.783e-02, -3.369e-03, -5.365e-02, -1.653e-03));
	r += mul(s6_3, M4(-1.816e-01, -1.660e-02, 1.077e-01, -3.667e-01, 2.299e-01, 2.353e-01, -3.491e-01, -3.060e-02, 1.803e-01, -9.714e-02, 7.105e-02, 9.895e-02, 1.764e-01, 3.558e-02, 9.593e-02, 8.519e-02));
	r += mul(s6_4, M4(1.539e-01, 1.200e-01, -1.353e-01, 1.529e-01, -9.333e-02, -2.507e-01, 3.316e-01, -1.001e-01, 1.263e-01, 3.118e-02, -1.714e-01, 4.910e-01, 1.587e-01, -5.905e-02, -1.439e-01, -1.274e-01));
	r += mul(s6_5, M4(-1.838e-01, -2.972e-02, -9.090e-02, 1.057e-01, 1.846e-01, -7.972e-02, -9.290e-02, 1.015e-01, 2.065e-01, 4.804e-02, -1.697e-01, -3.964e-01, 5.007e-02, 5.312e-02, -3.128e-02, 9.884e-02));
	r += mul(s6_6, M4(-3.086e-02, -6.920e-02, -9.317e-02, 4.002e-02, -2.568e-01, 2.163e-01, -7.020e-02, -5.020e-02, 1.172e-01, -1.631e-01, -2.243e-01, -1.067e-01, 3.532e-02, -3.497e-01, 1.851e-01, -1.516e-01));
	r += mul(s6_7, M4(2.297e-01, 2.157e-01, 7.460e-02, -8.860e-02, 8.448e-02, -3.654e-02, -2.066e-01, 1.232e-01, -2.739e-01, 2.449e-01, -2.853e-01, -1.242e-01, -1.292e-01, 8.422e-02, -2.224e-01, -1.715e-02));
	r += mul(s6_8, M4(6.990e-02, 7.739e-02, 2.214e-01, 6.205e-03, 2.966e-02, -2.918e-02, -3.100e-01, -3.494e-02, -2.535e-01, -6.415e-02, -3.804e-02, 7.443e-02, 1.881e-01, -4.946e-02, -1.682e-02, 4.035e-02));
	r += mul(s7_0, M4(-3.276e-02, 5.851e-02, -7.947e-02, 1.102e-01, 1.314e-01, -2.101e-02, 4.667e-03, 1.552e-01, 1.391e-01, 9.108e-02, -5.034e-02, -1.252e-01, 1.662e-01, -2.567e-01, 2.469e-02, -2.212e-01));
	r += mul(s7_1, M4(-6.053e-02, -5.261e-02, -6.294e-02, -2.328e-01, 3.425e-01, 1.057e-01, 7.203e-02, -3.723e-01, 2.148e-02, 3.648e-02, -1.026e-01, 4.757e-02, 5.726e-02, 5.349e-02, 1.339e-01, -1.629e-01));
	r += mul(s7_2, M4(-3.005e-03, 8.149e-02, 1.175e-01, -6.981e-02, 7.763e-02, -7.641e-02, 2.218e-01, -1.141e-01, 7.203e-02, -5.508e-02, -8.120e-02, 8.905e-02, 1.130e-01, -1.605e-01, 1.333e-01, -4.009e-02));
	r += mul(s7_3, M4(8.709e-02, -1.158e-02, 6.767e-03, 4.893e-02, 3.468e-01, -1.774e-01, 2.457e-01, -2.309e-01, -2.759e-01, -1.656e-01, 5.379e-03, 5.537e-02, -2.075e-01, -3.874e-01, 9.547e-02, 1.176e-01));
	r += mul(s7_4, M4(1.664e-01, -6.784e-03, 1.283e-01, -5.551e-02, 5.972e-01, -1.779e-01, 7.139e-01, -3.232e-02, -1.695e-01, 1.692e-01, -1.356e-01, 5.937e-02, -1.303e-01, -5.354e-02, 1.923e-01, -6.313e-03));
	r += mul(s7_5, M4(1.809e-01, -5.603e-02, -1.256e-01, 1.202e-01, 2.529e-01, 4.616e-02, -5.929e-02, 5.680e-02, -2.929e-01, 7.123e-02, 1.358e-01, -3.910e-02, -2.787e-01, 2.232e-02, 5.208e-02, -1.017e-01));
	r += mul(s7_6, M4(-2.591e-01, -6.096e-02, 8.695e-02, 1.089e-01, 1.466e-01, -6.621e-01, 2.940e-01, 1.774e-01, 2.114e-01, 4.313e-02, 1.019e-01, -3.813e-02, 1.112e-02, -1.973e-01, 1.290e-01, -1.048e-02));
	r += mul(s7_7, M4(3.255e-02, -2.408e-02, -1.358e-01, -8.922e-02, 2.999e-01, -8.591e-02, -1.777e-01, 2.427e-01, 1.476e-01, -1.198e-01, 1.215e-01, -7.049e-02, -1.513e-01, 2.305e-02, 1.597e-01, 3.870e-02));
	r += mul(s7_8, M4(-1.616e-01, 6.090e-02, 7.322e-02, 3.689e-02, 1.026e-02, -5.670e-02, 5.745e-02, 1.727e-01, 9.462e-02, 6.318e-02, 1.639e-01, -1.412e-02, -2.783e-01, -1.519e-01, -1.171e-01, 1.104e-03));
	r += V4(-8.533e-04, -5.599e-03, -1.843e-02, 5.456e-02);
	return r;
}

V4 f3(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) {
	V4 r = 0.0;
	r += mul(s0_0, M4(-2.560e-02, -7.087e-03, -1.410e-01, 9.884e-02, -3.941e-02, 4.886e-03, 9.250e-02, -1.153e-01, 1.099e-01, 1.787e-02, 7.890e-02, 7.949e-02, 4.733e-02, 4.525e-02, -1.421e-01, 1.549e-01));
	r += mul(s0_1, M4(-1.759e-02, 3.651e-02, -2.068e-01, -6.240e-02, -1.894e-01, -6.618e-02, -5.977e-02, 1.928e-01, 2.456e-02, -4.077e-02, 1.906e-02, 7.152e-02, 2.549e-01, 1.165e-01, 2.430e-01, 1.737e-01));
	r += mul(s0_2, M4(1.805e-03, -2.171e-03, -1.793e-01, -2.124e-01, 1.723e-01, 2.540e-02, -5.076e-03, 8.146e-02, 5.550e-03, 1.639e-02, -1.948e-01, 3.635e-03, -4.027e-02, -3.399e-02, 2.736e-01, 6.094e-02));
	r += mul(s0_3, M4(-2.643e-02, -3.043e-02, 1.180e-01, -8.517e-02, 2.385e-02, 4.729e-02, 3.137e-02, 9.062e-02, -1.664e-01, -3.938e-02, -2.115e-02, -4.653e-03, 2.821e-01, 9.189e-02, -2.044e-02, -2.310e-02));
	r += mul(s0_4, M4(5.197e-01, 1.222e-01, 6.437e-02, 1.062e-01, -3.377e-01, 4.647e-02, -9.965e-02, -7.055e-02, 1.566e-02, 5.988e-02, 2.100e-02, -5.248e-02, -3.215e-01, -1.829e-01, -1.951e-01, -8.805e-02));
	r += mul(s0_5, M4(-1.388e-01, 6.670e-02, 2.007e-01, -8.099e-02, 8.637e-02, -5.086e-02, 3.154e-02, -3.167e-03, 6.398e-02, -5.311e-03, 1.620e-01, -7.674e-02, -3.382e-01, -1.289e-01, 5.176e-01, -1.229e-01));
	r += mul(s0_6, M4(5.681e-02, -2.286e-02, 3.962e-02, -4.412e-02, -1.683e-01, 3.669e-02, 1.467e-01, -2.707e-03, -1.067e-01, -3.951e-03, -4.406e-02, 5.629e-02, -3.607e-02, -1.522e-02, -1.369e-01, -5.543e-02));
	r += mul(s0_7, M4(2.579e-01, 1.184e-01, 8.497e-02, 1.449e-01, -5.598e-02, 3.054e-02, -8.097e-02, -1.113e-01, 2.554e-01, -5.213e-02, -3.915e-02, -6.455e-02, -3.335e-01, 1.170e-01, -9.548e-02, 2.320e-02));
	r += mul(s0_8, M4(-2.914e-01, -1.802e-01, 2.361e-02, -7.381e-02, 1.470e-01, 5.039e-02, -7.519e-02, -4.570e-03, -1.217e-01, 1.500e-02, 6.749e-02, 7.562e-02, -3.426e-01, -1.080e-01, 6.514e-02, 6.881e-02));
	r += mul(s1_0, M4(1.618e-01, -3.931e-02, -2.383e-02, 1.431e-01, 5.090e-02, -1.330e-02, -1.028e-01, 6.260e-02, -2.058e-01, 7.500e-03, 1.932e-01, -7.916e-02, -1.371e-01, -8.696e-02, -4.685e-02, -2.273e-02));
	r += mul(s1_1, M4(-3.040e-01, -5.094e-02, 9.008e-02, -1.285e-01, 7.680e-01, -9.636e-02, -2.345e-02, -2.210e-01, 4.076e-02, 1.152e-01, 3.239e-01, -7.640e-01, 1.565e-02, -5.839e-02, 2.723e-01, 2.319e-01));
	r += mul(s1_2, M4(4.965e-03, 1.240e-01, 2.817e-01, 1.224e-01, 7.815e-02, 4.454e-02, -7.984e-03, -2.240e-01, -3.825e-01, 7.662e-02, -1.033e-01, -2.338e-01, 2.427e-02, 1.267e-01, 1.043e-01, 3.834e-01));
	r += mul(s1_3, M4(-8.741e-02, -2.785e-02, 1.292e-01, -9.184e-02, 7.198e-02, -5.808e-02, -1.383e-01, 4.611e-02, 8.833e-02, -2.850e-02, -1.079e-01, -6.543e-02, 2.152e-02, -8.040e-02, 4.714e-02, -1.107e-01));
	r += mul(s1_4, M4(-1.832e-01, 4.999e-02, 2.310e-01, 2.137e-01, 2.118e-01, -4.953e-02, -7.942e-04, -2.041e-02, 9.558e-02, -1.278e-02, -2.033e-01, 1.049e-01, -1.479e-01, -2.008e-01, -5.131e-02, -2.963e-01));
	r += mul(s1_5, M4(-6.279e-02, 2.429e-01, 4.757e-03, -1.206e-01, -1.910e-01, -1.530e-01, 6.063e-01, -2.034e-01, 1.171e-01, 6.146e-02, -3.540e-02, 2.388e-01, 4.859e-01, 1.376e-01, -6.543e-01, 1.784e-01));
	r += mul(s1_6, M4(-1.344e-01, 1.360e-02, 9.947e-02, 1.474e-03, 1.993e-02, 3.195e-03, 4.908e-02, -3.237e-02, 1.468e-01, 8.365e-02, 1.188e-01, -1.473e-01, -4.335e-02, 2.327e-02, -5.855e-02, -1.053e-01));
	r += mul(s1_7, M4(-4.558e-03, 1.134e-02, -5.775e-02, 8.397e-02, 1.323e-01, 3.408e-02, 1.067e-01, 1.505e-02, -1.862e-01, -2.054e-01, 1.248e-01, 1.360e-01, -1.162e-01, 5.213e-02, 5.240e-03, 8.693e-02));
	r += mul(s1_8, M4(1.518e-01, 2.164e-01, 8.775e-02, -5.471e-02, -8.322e-03, 8.992e-02, -5.009e-03, -9.936e-02, -8.858e-02, -1.484e-01, 1.137e-01, -4.466e-01, 3.340e-01, 6.147e-02, -1.144e-01, -8.945e-02));
	r += mul(s2_0, M4(-4.849e-02, -1.255e-02, -1.621e-01, -7.859e-02, -2.514e-01, -1.148e-02, -9.760e-02, -1.137e-01, 2.550e-01, 1.699e-02, -8.587e-02, 7.128e-02, 7.682e-02, -7.427e-02, -8.652e-02, 3.613e-01));
	r += mul(s2_1, M4(2.779e-02, 7.570e-03, -1.344e-01, 1.296e-02, -1.083e-02, 3.117e-02, -1.328e-01, -3.693e-02, 2.474e-01, 1.763e-01, 1.408e-02, 1.362e-01, -2.391e-02, -2.802e-01, -4.279e-02, 3.188e-01));
	r += mul(s2_2, M4(-1.033e-01, -3.043e-02, -7.807e-02, -5.715e-02, 1.855e-01, -1.128e-01, 1.395e-02, -9.351e-02, -2.720e-01, 2.711e-01, -4.261e-02, 6.892e-02, 1.308e-01, -1.577e-01, 1.907e-01, 4.823e-01));
	r += mul(s2_3, M4(-1.561e-01, -2.317e-02, 2.741e-02, -1.019e-01, -2.797e-02, 7.486e-02, -6.564e-02, 2.268e-02, 2.078e-01, -2.920e-01, -1.632e-01, -5.214e-02, -2.783e-01, -3.381e-01, 2.476e-01, -4.718e-01));
	r += mul(s2_4, M4(2.369e-01, 3.477e-02, 2.674e-01, 2.764e-01, 1.127e-02, 2.274e-02, 5.289e-02, 1.770e-01, -1.297e-03, 1.805e-02, 1.734e-01, 3.586e-03, 1.278e-01, -6.466e-01, -5.282e-03, -3.198e-01));
	r += mul(s2_5, M4(3.002e-02, 1.447e-01, 2.242e-01, 3.591e-02, 1.510e-02, 5.069e-02, 1.510e-01, 1.019e-01, 9.118e-02, -4.876e-02, -1.372e-01, 2.119e-01, 5.599e-01, -2.415e-01, -2.045e-01, -5.714e-01));
	r += mul(s2_6, M4(-6.998e-02, -3.651e-02, 2.052e-03, -1.139e-01, 5.134e-02, -6.756e-02, -6.024e-02, -1.392e-01, 1.954e-01, 9.353e-02, 1.594e-02, -2.237e-01, -1.781e-01, -1.366e-01, -2.123e-01, 2.311e-02));
	r += mul(s2_7, M4(1.693e-01, -9.344e-02, 1.462e-01, 1.420e-01, -8.540e-02, 4.959e-02, -4.992e-02, 1.013e-01, -5.185e-01, 9.739e-02, -5.330e-02, -1.208e-01, -1.194e-01, -2.685e-01, -2.982e-01, -1.289e-01));
	r += mul(s2_8, M4(1.141e-01, -5.735e-02, 8.146e-02, -2.221e-02, 9.828e-02, -5.525e-02, 6.784e-02, -9.348e-02, -1.546e-01, 3.030e-01, -2.793e-02, 1.449e-02, -3.587e-01, -1.617e-01, 1.805e-01, 2.176e-01));
	r += mul(s3_0, M4(-8.667e-03, -5.350e-02, -3.774e-02, 7.121e-02, 1.094e-01, 2.259e-02, 5.645e-01, 4.007e-02, 6.136e-03, 2.721e-02, 1.573e-02, -1.529e-02, 3.419e-02, -1.128e-01, 1.004e-01, 6.939e-02));
	r += mul(s3_1, M4(3.826e-02, -2.135e-02, -3.854e-01, -4.123e-01, 1.643e-01, 8.967e-02, 7.549e-02, -3.133e-01, -8.262e-02, -2.612e-02, 1.106e-01, 1.795e-01, -7.740e-02, -3.704e-02, 2.029e-01, 1.461e-02));
	r += mul(s3_2, M4(-6.023e-02, 6.654e-02, -1.361e-01, -1.431e-01, 7.496e-02, 1.351e-01, 1.292e-01, 1.911e-01, -6.692e-02, -3.242e-02, 3.928e-02, -8.824e-03, -1.226e-03, -4.044e-02, 1.344e-01, 1.214e-01));
	r += mul(s3_3, M4(-3.305e-02, -1.101e-01, -1.894e-01, -3.469e-01, -1.210e-01, -3.212e-02, 2.904e-01, -2.047e-01, -1.211e-03, -7.072e-02, -1.544e-02, -5.603e-02, 1.195e-01, -2.336e-01, -5.574e-02, -3.214e-02));
	r += mul(s3_4, M4(-2.567e-01, 1.577e-01, -2.711e-01, -1.814e-01, 2.629e-01, -1.322e-01, 4.110e-01, -2.530e-01, 1.157e-01, -6.624e-02, -6.523e-02, 9.403e-02, -3.037e-02, -1.903e-01, -4.489e-02, -6.323e-02));
	r += mul(s3_5, M4(3.183e-01, 1.222e-01, -1.796e-01, -6.899e-01, -3.581e-02, 7.077e-02, 1.821e-01, -2.122e-01, -3.829e-03, 3.924e-02, 4.984e-02, 3.319e-03, -1.210e-02, -1.916e-01, -6.451e-02, -5.390e-02));
	r += mul(s3_6, M4(9.451e-02, 1.416e-01, 2.363e-01, -2.431e-03, -4.550e-01, 9.670e-02, 4.511e-01, -2.440e-01, -1.039e-02, -7.438e-03, -5.394e-02, -1.583e-01, 5.988e-02, -1.223e-01, -6.949e-02, -7.947e-02));
	r += mul(s3_7, M4(-4.528e-01, 1.578e-01, -2.586e-01, 9.291e-02, -4.691e-02, 1.333e-01, 7.600e-02, -6.177e-03, 1.024e-02, -2.282e-02, -7.689e-02, -6.901e-02, -6.665e-02, -4.332e-02, -1.728e-01, -6.849e-02));
	r += mul(s3_8, M4(1.362e-01, 2.879e-02, -1.518e-01, -8.947e-02, -9.332e-02, 1.320e-01, 1.138e-01, -1.209e-01, -4.019e-02, 1.469e-01, 1.528e-03, -1.214e-01, -5.435e-02, -1.586e-01, -2.739e-02, 6.551e-02));
	r += mul(s4_0, M4(-1.033e-01, -6.952e-02, -1.560e-02, 1.402e-02, -3.370e-02, -7.858e-02, 2.946e-01, -9.622e-02, -4.151e-02, -3.780e-02, -1.132e-01, 1.453e-01, 1.965e-02, -3.571e-02, 9.255e-02, -1.576e-02));
	r += mul(s4_1, M4(-6.419e-02, 1.816e-02, -1.188e-01, -1.148e-01, -4.849e-02, 2.576e-03, 5.385e-01, 6.037e-03, 4.484e-02, -1.937e-02, -4.330e-01, -7.164e-02, -8.341e-02, 3.394e-02, -6.566e-02, 2.859e-01));
	r += mul(s4_2, M4(1.414e-01, 9.715e-03, -2.075e-02, 3.829e-02, -8.767e-02, -9.468e-02, 3.312e-01, -6.282e-03, 3.118e-02, 1.290e-02, -7.580e-03, 9.585e-02, 1.152e-01, -7.785e-02, 8.913e-02, 1.269e-01));
	r += mul(s4_3, M4(-1.300e-01, -8.615e-02, 2.134e-02, -2.515e-02, 4.651e-02, 2.008e-01, -6.371e-02, -1.236e-02, 2.614e-01, -1.788e-01, -1.925e-02, -1.645e-02, -1.069e-01, -6.968e-02, 6.480e-02, 3.090e-02));
	r += mul(s4_4, M4(1.352e-01, -1.886e-02, -1.145e-01, 4.115e-02, -3.652e-01, -8.227e-02, 5.339e-02, -2.728e-01, 1.191e-01, 2.115e-02, 2.001e-01, -1.219e-02, -5.189e-01, -9.690e-03, 9.258e-02, -7.153e-02));
	r += mul(s4_5, M4(-3.574e-02, 6.680e-02, -2.589e-02, -3.711e-02, -1.948e-01, 8.654e-02, 2.185e-01, -1.511e-01, -9.703e-02, 1.522e-01, -2.773e-01, 1.940e-01, -7.396e-02, 1.386e-01, 2.249e-01, -3.014e-02));
	r += mul(s4_6, M4(-1.586e-01, -3.006e-02, 4.357e-02, 3.494e-02, 1.060e-01, 6.097e-02, -4.613e-02, 1.076e-01, 1.332e-01, 1.145e-01, -5.582e-02, 1.621e-01, -1.310e-02, -2.543e-02, -1.657e-02, -2.863e-02));
	r += mul(s4_7, M4(1.932e-01, 6.256e-02, 1.217e-01, -8.493e-02, -4.115e-01, 1.171e-03, 1.135e-01, -7.224e-02, -1.774e-02, 9.645e-02, -1.115e-01, 6.669e-02, -5.159e-02, 6.090e-02, -4.863e-02, -1.897e-02));
	r += mul(s4_8, M4(-7.878e-02, -9.714e-02, 1.048e-01, 9.664e-03, 8.354e-02, 2.088e-02, 2.220e-02, 2.531e-02, -6.113e-02, 3.972e-02, -9.207e-02, 4.613e-02, 1.827e-01, 7.274e-02, -2.476e-01, -5.481e-02));
	r += mul(s5_0, M4(1.970e-01, 1.672e-01, 2.433e-01, -8.995e-02, 7.859e-02, -3.751e-02, -3.454e-02, 2.298e-01, -1.324e-01, -1.465e-02, -8.560e-02, -2.178e-01, -3.343e-02, 6.746e-02, 8.993e-02, -4.901e-02));
	r += mul(s5_1, M4(-6.850e-03, -1.971e-01, -5.692e-02, 7.432e-02, 3.548e-01, -1.108e-01, 3.175e-01, 1.081e+00, 5.009e-03, -4.461e-02, 2.832e-02, -2.137e-01, 2.470e-01, 5.725e-02, -1.697e-02, -1.859e-02));
	r += mul(s5_2, M4(5.239e-02, 6.549e-02, -2.298e-01, -1.966e-01, -9.228e-02, -1.393e-01, 1.912e-01, 4.543e-01, -1.425e-02, 9.029e-02, -9.371e-02, -2.144e-01, 7.205e-02, 2.581e-02, -1.731e-01, -2.638e-01));
	r += mul(s5_3, M4(9.616e-02, 1.156e-01, -4.901e-02, 3.704e-02, -1.969e-02, 3.668e-02, -7.894e-02, 4.214e-03, -1.831e-01, 2.568e-02, 1.322e-02, 4.639e-02, -7.497e-02, 6.491e-02, -5.406e-03, -1.238e-01));
	r += mul(s5_4, M4(-1.216e-01, 2.233e-01, -8.263e-02, -1.517e-01, 2.918e-01, -5.018e-02, -1.391e-01, 2.899e-02, -1.208e-01, -1.490e-01, -2.105e-01, -5.076e-02, -8.915e-03, 4.420e-03, -3.801e-01, -5.936e-03));
	r += mul(s5_5, M4(8.879e-02, 2.007e-01, -2.374e-02, 4.744e-01, -2.168e-01, 8.071e-02, 6.015e-03, 8.826e-02, 1.548e-01, -1.410e-01, 1.649e-01, -1.502e-01, 4.645e-03, 8.603e-02, 4.699e-02, 1.293e-02));
	r += mul(s5_6, M4(2.715e-01, 2.036e-01, -1.004e-01, 2.508e-01, 5.107e-02, 1.315e-01, 6.567e-02, 6.999e-02, 1.513e-02, -1.198e-01, -3.505e-02, 1.060e-01, 8.095e-02, -3.236e-03, 2.448e-01, 7.195e-03));
	r += mul(s5_7, M4(-7.150e-01, 2.499e-01, -3.129e-01, 2.092e-01, 2.434e-02, -6.082e-03, -5.837e-02, 2.251e-02, -1.022e-01, -2.280e-02, -2.848e-01, -1.327e-02, 4.425e-02, 6.298e-02, 1.132e-02, -9.445e-02));
	r += mul(s5_8, M4(-5.568e-02, -6.042e-02, 1.757e-01, -5.052e-02, -5.824e-02, 6.717e-03, -8.544e-02, -1.293e-02, -8.259e-02, 5.506e-02, 9.705e-02, 9.304e-02, -1.503e-01, -8.160e-02, 1.082e-01, -1.377e-01));
	r += mul(s6_0, M4(3.476e-01, 9.336e-02, 2.231e-02, 1.022e-02, -3.424e-02, -7.095e-02, 3.900e-02, 9.158e-02, -1.061e-01, -5.546e-02, -9.304e-02, -5.883e-02, 1.331e-01, 7.858e-02, 1.167e-01, 7.884e-02));
	r += mul(s6_1, M4(-1.835e-02, -1.974e-01, -1.369e-01, 2.903e-01, -1.457e-02, -2.445e-02, 1.877e-02, 1.939e-01, -9.732e-02, 3.776e-02, -1.752e-01, -2.629e-01, 2.034e-01, -6.453e-02, 2.623e-01, -2.109e-01));
	r += mul(s6_2, M4(-3.623e-02, 5.361e-02, -2.681e-01, 1.447e-01, -3.448e-02, 4.750e-02, 9.537e-02, 1.124e-03, 1.402e-01, 1.087e-01, -1.424e-01, -1.509e-01, 3.056e-02, -5.132e-02, -5.697e-03, -1.327e-02));
	r += mul(s6_3, M4(-4.655e-02, -4.782e-02, 2.343e-01, 8.769e-02, 1.124e-01, -2.491e-03, -2.597e-02, -6.833e-02, -1.243e-01, 4.811e-02, -3.578e-01, 2.454e-02, -1.434e-01, 5.503e-03, -6.405e-02, 2.061e-02));
	r += mul(s6_4, M4(-2.463e-01, -8.253e-02, 3.349e-01, -2.741e-01, -1.254e-02, 3.522e-02, -1.978e-01, 1.421e-03, -1.022e-02, 1.297e-02, -9.613e-02, -2.839e-01, 1.682e-01, 3.789e-02, -1.225e-01, -1.144e-01));
	r += mul(s6_5, M4(-1.381e-01, 1.578e-01, 1.174e-01, 9.555e-02, 1.071e-01, -1.781e-02, 4.394e-02, 3.225e-02, 1.769e-01, -1.736e-01, -3.724e-01, -4.223e-01, -1.162e-01, 3.163e-02, -5.523e-02, -9.306e-02));
	r += mul(s6_6, M4(5.363e-02, 2.286e-02, -1.182e-01, 2.210e-01, -7.831e-02, 3.203e-03, 7.760e-02, 3.020e-03, 2.411e-01, -5.532e-02, -2.875e-02, 1.754e-01, 1.305e-01, -2.458e-03, 1.417e-01, 6.157e-03));
	r += mul(s6_7, M4(-1.955e-01, -1.845e-01, 8.119e-02, -1.347e-01, -3.374e-02, 1.839e-02, 3.874e-03, -1.838e-01, -4.347e-01, -2.846e-01, -1.806e-01, 5.604e-02, 1.243e-01, -2.117e-02, -5.486e-02, 6.884e-03));
	r += mul(s6_8, M4(7.283e-02, -1.282e-01, -2.823e-01, 5.028e-02, -1.353e-01, 1.419e-01, -5.407e-02, -6.650e-03, 3.999e-01, -1.855e-01, -2.906e-01, -3.688e-02, -1.405e-01, 1.427e-02, -2.796e-02, -4.579e-02));
	r += mul(s7_0, M4(9.975e-02, 7.898e-02, 1.381e-01, 8.796e-02, 2.603e-01, -8.414e-02, -1.423e-01, -9.692e-03, -1.139e-01, 6.085e-02, -3.823e-03, -7.671e-02, 1.877e-02, -8.808e-03, 2.768e-02, 2.780e-01));
	r += mul(s7_1, M4(8.771e-02, 1.460e-02, 1.628e-02, 5.329e-03, 1.449e-01, -3.699e-01, 3.923e-01, 2.301e-02, -1.277e-01, -2.741e-02, -1.047e-01, 5.968e-02, -5.592e-02, 7.473e-02, 2.236e-01, 6.206e-01));
	r += mul(s7_2, M4(-1.469e-01, -7.466e-02, -5.950e-02, -1.130e-01, 2.839e-02, -2.606e-02, 1.334e-01, 1.732e-01, 3.303e-02, -9.745e-02, -1.245e-01, 6.786e-02, 1.955e-01, -5.324e-02, 2.093e-01, 1.236e-01));
	r += mul(s7_3, M4(-1.293e-01, -2.116e-02, -3.673e-02, -9.411e-02, -5.480e-01, 1.889e-01, 7.690e-02, 1.970e-01, -4.543e-02, 5.556e-02, -6.816e-02, 1.532e-02, -2.292e-03, -1.880e-02, 7.113e-02, 2.672e-02));
	r += mul(s7_4, M4(6.920e-02, -4.618e-02, -6.882e-02, 3.691e-02, -8.673e-02, -1.959e-01, -3.585e-01, -9.090e-02, 2.925e-01, 3.724e-02, 2.743e-01, 3.540e-01, -2.959e-01, -3.552e-02, -4.064e-02, -1.069e-01));
	r += mul(s7_5, M4(-3.202e-02, 2.347e-02, 7.362e-02, 2.157e-02, -7.213e-02, -1.073e-01, 2.008e-01, -2.052e-01, -1.134e-01, -2.505e-02, 1.241e-02, 1.802e-01, -1.081e-01, 5.897e-02, 1.798e-02, 7.188e-02));
	r += mul(s7_6, M4(-5.356e-04, 6.207e-02, 8.194e-02, 8.600e-03, -2.790e-01, 2.941e-01, -4.157e-02, -2.512e-01, 8.039e-02, -1.770e-02, -5.099e-02, 3.033e-02, -8.296e-02, -2.268e-02, -1.175e-01, 1.419e-02));
	r += mul(s7_7, M4(-4.728e-03, -4.249e-02, -9.443e-02, 2.155e-03, -1.180e-01, -1.111e-01, 5.674e-02, 1.564e-01, 3.111e-01, 4.206e-02, 1.174e-01, 2.935e-02, 5.181e-02, -9.064e-02, 8.466e-02, -9.002e-03));
	r += mul(s7_8, M4(1.323e-01, 8.600e-02, -3.627e-02, 3.855e-02, -5.842e-02, 3.473e-01, -2.465e-02, -1.118e-01, -6.674e-02, 4.149e-03, 6.115e-04, 4.737e-02, 1.022e-01, 4.757e-02, 2.210e-02, -5.416e-02));
	r += V4(-3.625e-03, -6.509e-02, -2.543e-02, 2.655e-02);
	return r;
}

void Pass2(uint2 blockStart, uint3 tid) {
	float2 pt = float2(GetInputPt());
	uint2 gxy = Rmp8x8(tid.x) + blockStart;
	uint2 size = GetInputSize();
	if (gxy.x >= size.x || gxy.y >= size.y) {
		return;
	}
	float2 pos = (gxy + 0.5) * pt;

	V4 s0_0 = l0(-1.0, -1.0);
	V4 s0_1 = l0(0.0, -1.0);
	V4 s0_2 = l0(1.0, -1.0);
	V4 s0_3 = l0(-1.0, 0.0);
	V4 s0_4 = l0(0.0, 0.0);
	V4 s0_5 = l0(1.0, 0.0);
	V4 s0_6 = l0(-1.0, 1.0);
	V4 s0_7 = l0(0.0, 1.0);
	V4 s0_8 = l0(1.0, 1.0);
	V4 s1_0 = -max(-s0_0, 0.0);
	V4 s1_1 = -max(-s0_1, 0.0);
	V4 s1_2 = -max(-s0_2, 0.0);
	V4 s1_3 = -max(-s0_3, 0.0);
	V4 s1_4 = -max(-s0_4, 0.0);
	V4 s1_5 = -max(-s0_5, 0.0);
	V4 s1_6 = -max(-s0_6, 0.0);
	V4 s1_7 = -max(-s0_7, 0.0);
	V4 s1_8 = -max(-s0_8, 0.0);
	s0_0 = max(s0_0, 0.0);
	s0_1 = max(s0_1, 0.0);
	s0_2 = max(s0_2, 0.0);
	s0_3 = max(s0_3, 0.0);
	s0_4 = max(s0_4, 0.0);
	s0_5 = max(s0_5, 0.0);
	s0_6 = max(s0_6, 0.0);
	s0_7 = max(s0_7, 0.0);
	s0_8 = max(s0_8, 0.0);

	V4 s2_0 = l1(-1.0, -1.0);
	V4 s2_1 = l1(0.0, -1.0);
	V4 s2_2 = l1(1.0, -1.0);
	V4 s2_3 = l1(-1.0, 0.0);
	V4 s2_4 = l1(0.0, 0.0);
	V4 s2_5 = l1(1.0, 0.0);
	V4 s2_6 = l1(-1.0, 1.0);
	V4 s2_7 = l1(0.0, 1.0);
	V4 s2_8 = l1(1.0, 1.0);
	V4 s3_0 = -max(-s2_0, 0.0);
	V4 s3_1 = -max(-s2_1, 0.0);
	V4 s3_2 = -max(-s2_2, 0.0);
	V4 s3_3 = -max(-s2_3, 0.0);
	V4 s3_4 = -max(-s2_4, 0.0);
	V4 s3_5 = -max(-s2_5, 0.0);
	V4 s3_6 = -max(-s2_6, 0.0);
	V4 s3_7 = -max(-s2_7, 0.0);
	V4 s3_8 = -max(-s2_8, 0.0);
	s2_0 = max(s2_0, 0.0);
	s2_1 = max(s2_1, 0.0);
	s2_2 = max(s2_2, 0.0);
	s2_3 = max(s2_3, 0.0);
	s2_4 = max(s2_4, 0.0);
	s2_5 = max(s2_5, 0.0);
	s2_6 = max(s2_6, 0.0);
	s2_7 = max(s2_7, 0.0);
	s2_8 = max(s2_8, 0.0);

	V4 s4_0 = l2(-1.0, -1.0);
	V4 s4_1 = l2(0.0, -1.0);
	V4 s4_2 = l2(1.0, -1.0);
	V4 s4_3 = l2(-1.0, 0.0);
	V4 s4_4 = l2(0.0, 0.0);
	V4 s4_5 = l2(1.0, 0.0);
	V4 s4_6 = l2(-1.0, 1.0);
	V4 s4_7 = l2(0.0, 1.0);
	V4 s4_8 = l2(1.0, 1.0);
	V4 s5_0 = -max(-s4_0, 0.0);
	V4 s5_1 = -max(-s4_1, 0.0);
	V4 s5_2 = -max(-s4_2, 0.0);
	V4 s5_3 = -max(-s4_3, 0.0);
	V4 s5_4 = -max(-s4_4, 0.0);
	V4 s5_5 = -max(-s4_5, 0.0);
	V4 s5_6 = -max(-s4_6, 0.0);
	V4 s5_7 = -max(-s4_7, 0.0);
	V4 s5_8 = -max(-s4_8, 0.0);
	s4_0 = max(s4_0, 0.0);
	s4_1 = max(s4_1, 0.0);
	s4_2 = max(s4_2, 0.0);
	s4_3 = max(s4_3, 0.0);
	s4_4 = max(s4_4, 0.0);
	s4_5 = max(s4_5, 0.0);
	s4_6 = max(s4_6, 0.0);
	s4_7 = max(s4_7, 0.0);
	s4_8 = max(s4_8, 0.0);

	V4 s6_0 = l3(-1.0, -1.0);
	V4 s6_1 = l3(0.0, -1.0);
	V4 s6_2 = l3(1.0, -1.0);
	V4 s6_3 = l3(-1.0, 0.0);
	V4 s6_4 = l3(0.0, 0.0);
	V4 s6_5 = l3(1.0, 0.0);
	V4 s6_6 = l3(-1.0, 1.0);
	V4 s6_7 = l3(0.0, 1.0);
	V4 s6_8 = l3(1.0, 1.0);
	V4 s7_0 = -max(-s6_0, 0.0);
	V4 s7_1 = -max(-s6_1, 0.0);
	V4 s7_2 = -max(-s6_2, 0.0);
	V4 s7_3 = -max(-s6_3, 0.0);
	V4 s7_4 = -max(-s6_4, 0.0);
	V4 s7_5 = -max(-s6_5, 0.0);
	V4 s7_6 = -max(-s6_6, 0.0);
	V4 s7_7 = -max(-s6_7, 0.0);
	V4 s7_8 = -max(-s6_8, 0.0);
	s6_0 = max(s6_0, 0.0);
	s6_1 = max(s6_1, 0.0);
	s6_2 = max(s6_2, 0.0);
	s6_3 = max(s6_3, 0.0);
	s6_4 = max(s6_4, 0.0);
	s6_5 = max(s6_5, 0.0);
	s6_6 = max(s6_6, 0.0);
	s6_7 = max(s6_7, 0.0);
	s6_8 = max(s6_8, 0.0);

	t4[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8);
	t5[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8);
	t6[gxy] = f2(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8);
	t7[gxy] = f3(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8);
}

//!PASS 3
//!DESC conv2
//!BLOCK_SIZE 8
//!NUM_THREADS 64
//!IN t4, t5, t6, t7
//!OUT t0, t1, t2, t3

#define l0(x, y) V4(O(t4, float2(x, y)))
#define l1(x, y) V4(O(t5, float2(x, y)))
#define l2(x, y) V4(O(t6, float2(x, y)))
#define l3(x, y) V4(O(t7, float2(x, y)))

V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) {
	V4 r = 0.0;
	r += mul(s0_0, M4(4.835e-02, -2.396e-02, 1.538e-01, -1.119e-02, -7.433e-03, 2.244e-02, -4.176e-01, 3.162e-02, 2.272e-02, 2.109e-02, 2.717e-02, 1.403e-01, 4.632e-01, 8.008e-02, -3.298e-01, -1.300e-01));
	r += mul(s0_1, M4(-7.625e-02, -1.928e-02, -3.921e-02, -5.083e-02, -1.915e-01, 1.015e-01, -4.724e-01, 5.635e-02, -1.024e-02, 9.931e-03, -8.880e-02, 4.998e-02, 6.319e-01, -1.827e-01, 1.566e-01, -3.693e-01));
	r += mul(s0_2, M4(-6.468e-02, -1.760e-01, -7.115e-02, -4.285e-02, -2.285e-01, 8.701e-02, -2.009e-01, -1.627e-01, -8.255e-02, -5.679e-02, 5.767e-03, 2.168e-01, 4.349e-02, 3.930e-02, 4.913e-02, -1.117e-01));
	r += mul(s0_3, M4(1.302e-01, 5.135e-02, 4.496e-02, -3.394e-02, -7.374e-02, -4.251e-02, -5.037e-02, -2.096e-02, 3.509e-02, -1.396e-01, 1.593e-01, 6.313e-02, 9.144e-03, 6.799e-02, -2.578e-01, 1.379e-02));
	r += mul(s0_4, M4(-4.673e-02, -3.859e-02, 7.260e-03, -1.913e-01, 9.779e-02, -1.876e-02, -1.807e-01, -1.189e-02, -4.810e-02, 1.021e-01, 6.610e-02, 5.553e-02, -3.128e-02, 3.649e-01, 2.145e-02, 1.857e-01));
	r += mul(s0_5, M4(-6.326e-02, -2.190e-02, -1.025e-01, 2.645e-01, 1.325e-01, -3.209e-02, 3.893e-03, 1.114e-01, -1.186e-01, -2.977e-02, -8.407e-02, -2.533e-01, 1.688e-01, -2.146e-02, 2.017e-01, -2.172e-01));
	r += mul(s0_6, M4(-2.134e-02, 2.436e-03, 7.637e-02, 1.031e-02, 1.962e-02, -2.824e-02, -1.031e-01, -1.505e-01, 4.045e-02, -3.927e-02, 4.998e-02, -1.292e-01, -7.246e-03, 1.795e-03, 7.585e-02, 4.875e-02));
	r += mul(s0_7, M4(-2.179e-02, -3.530e-02, -1.362e-01, -2.993e-02, 1.283e-01, 1.468e-02, 1.005e-01, 3.694e-01, 4.319e-02, 7.983e-02, 2.198e-02, -4.084e-02, -1.126e-02, 3.306e-02, 5.772e-02, -1.415e-01));
	r += mul(s0_8, M4(4.490e-02, -4.903e-02, 3.056e-02, 4.978e-02, -1.774e-02, -1.850e-02, 1.111e-01, -1.493e-02, 4.528e-02, 7.043e-02, 7.131e-02, 1.664e-01, 7.829e-02, -1.444e-02, 4.841e-02, -4.486e-02));
	r += mul(s1_0, M4(-3.755e-02, -4.232e-02, -5.924e-02, 6.035e-02, -1.205e-01, 1.010e-02, 2.912e-02, 1.513e-01, -8.444e-02, -4.351e-02, -5.507e-02, 9.106e-02, -2.273e-03, -5.590e-02, 8.234e-02, -8.793e-02));
	r += mul(s1_1, M4(1.390e-01, -5.847e-02, -1.205e-01, 3.187e-01, -2.568e-01, 3.647e-02, -4.785e-02, 1.482e-01, 8.543e-02, 1.616e-01, -1.851e-01, 5.716e-02, -4.927e-01, -1.209e-01, 3.762e-02, 1.529e-01));
	r += mul(s1_2, M4(-1.530e-01, -1.645e-01, -1.839e-02, 2.038e-01, 8.178e-02, 1.021e-02, 6.465e-02, 7.910e-02, 2.042e-02, -1.093e-01, 2.834e-03, -3.437e-02, -1.692e-01, -8.842e-02, 1.308e-01, 2.947e-01));
	r += mul(s1_3, M4(7.424e-02, -1.654e-01, -1.246e-01, 1.160e-01, 1.581e-01, -6.440e-03, -8.087e-03, 6.360e-02, 1.111e-01, 2.032e-02, 2.760e-02, 2.297e-01, -2.736e-02, 1.178e-02, 4.567e-02, 6.139e-02));
	r += mul(s1_4, M4(3.265e-01, -5.441e-01, 9.085e-02, -1.616e-01, -1.701e-02, -4.155e-02, 4.148e-02, -3.896e-01, 2.620e-01, 1.954e-01, 1.190e-01, -1.761e-01, -3.432e-01, 1.121e-01, -2.782e-01, 2.503e-01));
	r += mul(s1_5, M4(-3.005e-01, -3.291e-01, -2.238e-02, -3.021e-02, -1.015e-02, -7.567e-02, -1.607e-01, -5.375e-02, 1.436e-01, 1.396e-02, 9.834e-02, -1.777e-01, -1.109e-01, -9.568e-02, 4.459e-02, 2.166e-01));
	r += mul(s1_6, M4(-1.383e-02, -1.195e-01, -7.240e-02, -7.391e-02, 7.793e-02, -8.250e-04, 2.773e-02, -9.196e-02, 1.148e-01, 3.961e-02, 3.218e-02, -5.892e-02, -1.103e-02, 6.608e-02, -9.241e-03, 9.855e-02));
	r += mul(s1_7, M4(-2.428e-01, -2.303e-01, 2.808e-04, -2.284e-01, 1.944e-01, 7.671e-02, 1.247e-01, 2.123e-01, -6.229e-02, 3.342e-01, -4.740e-02, 1.074e-01, -9.819e-02, -3.941e-02, -3.164e-02, -4.836e-02));
	r += mul(s1_8, M4(1.763e-01, -1.885e-01, -5.378e-02, 2.569e-01, 5.091e-02, -2.026e-02, 1.619e-01, -4.407e-02, 5.194e-02, 5.987e-02, -6.887e-02, 2.649e-01, -8.527e-02, -2.827e-03, 2.306e-02, -5.023e-02));
	r += mul(s2_0, M4(-8.205e-03, 4.433e-03, -1.343e-01, 2.500e-02, 1.505e-01, 1.889e-02, -1.210e-01, -8.672e-02, 3.076e-02, 1.498e-02, -2.644e-01, 5.938e-02, -8.131e-02, -2.713e-02, 1.534e-01, -1.360e-01));
	r += mul(s2_1, M4(-2.952e-02, -1.140e-01, 3.189e-02, 2.388e-01, -1.578e-01, -7.641e-03, -5.162e-02, -2.394e-01, 2.268e-02, -4.927e-02, 8.503e-02, -1.354e-01, 1.159e-01, -3.340e-02, 4.248e-02, -1.120e-01));
	r += mul(s2_2, M4(-8.455e-02, -9.497e-02, 5.362e-03, 1.827e-01, 1.962e-02, 4.699e-02, 8.229e-03, 4.554e-02, -7.800e-02, -6.534e-03, -5.066e-02, 3.068e-02, -1.612e-01, -1.929e-02, -1.645e-01, 1.841e-01));
	r += mul(s2_3, M4(-1.360e-01, -1.711e-02, -6.952e-02, -1.146e-01, 6.530e-02, 3.795e-02, 1.130e-01, 9.324e-02, 1.226e-01, 6.978e-02, 9.599e-02, -7.250e-03, 2.414e-02, 2.477e-02, 9.773e-02, 1.837e-01));
	r += mul(s2_4, M4(2.329e-01, -6.908e-02, -3.555e-03, -2.507e-01, -7.980e-02, -1.374e-02, -2.531e-01, 2.446e-01, -1.155e-01, -1.329e-01, -1.561e-01, 1.295e-01, -8.048e-02, -9.703e-02, 1.904e-01, 2.686e-02));
	r += mul(s2_5, M4(-7.302e-02, -5.162e-02, -1.045e-01, -1.587e-01, 1.160e-01, -3.129e-02, -2.348e-02, -1.675e-01, 6.703e-02, 8.933e-02, -9.654e-02, -1.748e-02, -1.317e-01, -2.547e-02, 3.693e-03, -4.579e-01));
	r += mul(s2_6, M4(-2.708e-02, 2.805e-02, 1.276e-01, 6.081e-02, 1.260e-01, -1.918e-02, -1.428e-02, 4.839e-02, 1.033e-02, -1.845e-02, 4.060e-02, -1.122e-01, -2.161e-01, 7.566e-02, 8.280e-02, 5.468e-02));
	r += mul(s2_7, M4(-5.334e-02, -2.162e-01, 1.007e-01, 2.634e-01, -1.227e-01, -2.969e-02, -9.729e-03, 3.114e-02, 4.496e-03, 1.649e-02, 1.208e-01, 7.892e-02, -7.813e-02, -6.307e-02, -1.427e-01, -5.671e-02));
	r += mul(s2_8, M4(-1.008e-01, -3.837e-02, -3.730e-02, 6.125e-02, 3.003e-02, -2.922e-02, -1.998e-02, -1.995e-03, 1.952e-02, -1.100e-02, -1.260e-01, 1.306e-01, 1.291e-01, 3.207e-02, 5.305e-02, 1.342e-01));
	r += mul(s3_0, M4(3.873e-03, -1.372e-02, -1.029e-01, 1.221e-01, -4.917e-01, 4.365e-02, -1.042e-01, -2.129e-02, -2.043e-02, 4.700e-02, -2.792e-01, 7.757e-03, 7.957e-02, 1.361e-02, 1.071e-01, 7.693e-02));
	r += mul(s3_1, M4(9.662e-02, -2.932e-02, 1.941e-01, -3.194e-01, -1.324e-01, -1.779e-02, -1.697e-01, -2.333e-01, 6.349e-02, -1.091e-01, 1.962e-01, 9.707e-02, 4.680e-02, -2.846e-02, 2.182e-01, 6.872e-02));
	r += mul(s3_2, M4(4.693e-02, -3.250e-03, -4.054e-02, -2.195e-01, 5.278e-02, 1.146e-01, -3.060e-01, -1.790e-01, 4.953e-02, 2.110e-02, -5.913e-02, -2.237e-01, 4.825e-02, -1.640e-02, -1.017e-02, 2.968e-03));
	r += mul(s3_3, M4(-6.447e-02, 3.180e-02, 2.896e-02, 1.189e-01, 2.795e-01, 3.655e-03, -6.887e-02, 5.190e-02, -6.438e-02, 9.903e-04, 2.467e-01, -1.479e-01, 1.112e-01, -2.545e-02, 6.097e-02, -8.142e-02));
	r += mul(s3_4, M4(1.924e-01, 3.832e-02, -3.986e-01, 9.887e-02, 5.257e-01, -1.760e-01, -1.260e-01, 1.574e-01, 6.137e-01, -1.438e-01, -4.044e-01, -2.024e-01, 4.315e-02, -4.846e-02, 9.769e-02, 8.331e-02));
	r += mul(s3_5, M4(-2.651e-01, 4.248e-02, -2.012e-02, 5.405e-01, 8.924e-03, -5.811e-02, -1.290e-02, 4.415e-01, -2.601e-02, -2.126e-02, -1.961e-02, 3.511e-01, 4.215e-02, -2.716e-02, 1.296e-01, -2.416e-01));
	r += mul(s3_6, M4(-9.102e-02, 7.787e-02, 8.254e-02, 1.284e-01, 2.343e-02, 7.932e-02, 1.647e-01, 7.651e-02, 6.076e-02, 1.102e-01, -4.897e-02, 1.291e-02, -5.505e-02, 4.130e-02, 5.113e-04, -1.506e-02));
	r += mul(s3_7, M4(1.474e-01, -5.158e-02, 1.400e-01, -7.421e-02, 3.513e-02, 1.671e-03, 2.964e-01, 1.693e-01, -1.401e-01, 3.910e-01, 2.678e-03, -1.492e-01, 9.331e-02, 2.221e-02, -2.008e-02, -1.650e-01));
	r += mul(s3_8, M4(-5.802e-02, 4.015e-03, -7.727e-02, -1.935e-01, 7.799e-03, -2.203e-02, -6.666e-02, -1.774e-01, 4.786e-03, 1.240e-01, -1.484e-01, 4.574e-02, -1.314e-01, 3.668e-02, -3.512e-03, 2.067e-01));
	r += mul(s4_0, M4(-3.480e-02, -3.356e-02, -4.895e-02, -5.957e-03, -4.391e-02, 2.161e-02, -8.893e-02, 4.473e-03, -3.188e-01, 3.568e-02, -2.502e-02, 5.069e-02, -6.971e-02, 1.207e-02, -8.822e-02, -1.065e-02));
	r += mul(s4_1, M4(-4.001e-02, -1.151e-03, -1.282e-01, -1.111e-01, 7.041e-03, -5.520e-02, -2.813e-01, 2.801e-01, -2.163e-02, 1.550e-01, 2.525e-02, 9.990e-02, 1.432e-02, -8.062e-02, 1.754e-01, 7.733e-02));
	r += mul(s4_2, M4(1.065e-01, 4.473e-02, -1.780e-02, -1.057e-01, 2.457e-01, -6.964e-02, 2.837e-01, 5.544e-02, 1.114e-01, 1.061e-01, -4.014e-03, -7.439e-02, 6.132e-02, 2.903e-02, -2.439e-02, 3.338e-02));
	r += mul(s4_3, M4(2.458e-01, 2.098e-02, -1.032e-01, -5.928e-02, 1.101e-01, 6.145e-02, 3.779e-03, -5.586e-03, -1.783e-01, -1.598e-01, -1.364e-01, 1.479e-01, -8.708e-03, -2.184e-02, -7.759e-02, 1.388e-02));
	r += mul(s4_4, M4(-2.294e-02, -5.229e-02, 9.278e-02, 3.531e-02, -4.284e-02, 1.636e-01, 2.907e-02, -1.274e-01, 1.585e-01, -5.364e-02, 1.031e-01, -2.084e-01, -1.381e-01, 9.829e-02, 6.917e-02, 1.736e-01));
	r += mul(s4_5, M4(7.420e-02, 1.156e-01, 2.054e-02, -8.936e-02, 4.535e-02, -4.649e-02, -3.211e-02, 2.075e-01, 1.284e-01, 1.272e-03, 1.120e-01, 1.890e-01, -2.611e-03, 5.461e-02, 2.135e-02, 2.503e-02));
	r += mul(s4_6, M4(-1.240e-01, -1.611e-02, 5.571e-02, 1.172e-01, 7.590e-02, -1.411e-02, -8.898e-02, 1.471e-01, -5.932e-02, 2.798e-02, -7.635e-02, -3.789e-02, -1.260e-01, 2.772e-02, 1.101e-01, -8.736e-02));
	r += mul(s4_7, M4(-1.001e-01, 1.539e-02, 7.691e-02, 3.275e-03, -1.396e-01, -8.471e-02, 5.078e-02, -3.117e-03, -4.415e-02, 1.229e-01, 8.650e-02, 1.605e-01, 2.221e-01, -7.118e-02, -1.555e-01, 3.037e-02));
	r += mul(s4_8, M4(4.324e-02, -7.434e-02, 1.425e-01, 2.127e-02, -1.275e-01, 1.402e-02, 4.531e-02, -1.775e-01, 7.208e-03, 1.470e-01, -9.997e-02, 4.179e-02, 3.099e-02, 3.236e-03, 1.256e-02, -5.362e-02));
	r += mul(s5_0, M4(-5.499e-02, 1.089e-02, 1.966e-01, 1.265e-01, 1.598e-02, -8.382e-03, -5.048e-02, -1.358e-01, 1.039e-01, 5.560e-02, 5.535e-02, -5.814e-02, -2.833e-01, -8.683e-03, -1.797e-02, 2.128e-02));
	r += mul(s5_1, M4(-3.021e-01, -1.367e-01, -6.594e-02, -2.379e-01, -1.311e-01, -1.704e-02, 1.063e-01, -1.499e-01, -2.149e-02, 1.256e-01, 3.517e-02, -1.273e-01, 5.299e-02, 4.324e-02, -4.636e-02, -1.730e-01));
	r += mul(s5_2, M4(1.915e-01, -3.671e-02, 7.262e-02, 3.138e-01, 8.891e-02, -1.213e-02, 6.398e-02, -1.219e-01, -1.356e-01, -9.526e-03, -2.735e-02, -7.145e-02, -2.606e-01, 2.359e-02, 1.698e-01, 1.522e-01));
	r += mul(s5_3, M4(-2.451e-01, 1.785e-02, 3.102e-01, 3.007e-02, 1.073e-01, -4.354e-02, -4.559e-03, -3.315e-02, -6.856e-02, -7.520e-02, 1.284e-03, -6.251e-02, 8.424e-02, 1.095e-02, -3.088e-01, -9.742e-02));
	r += mul(s5_4, M4(-1.551e-01, -1.494e-01, 2.153e-01, 1.393e-02, -2.496e-01, 2.217e-01, 1.903e-01, 7.574e-02, 1.421e-01, -1.086e-01, -4.961e-02, 1.004e-01, 1.392e-01, 3.209e-01, 1.138e-01, -2.434e-01));
	r += mul(s5_5, M4(-4.235e-01, 2.616e-02, 1.230e-01, -8.260e-02, 6.940e-02, -8.190e-02, -6.281e-03, -2.808e-01, 3.210e-02, -7.540e-02, 1.612e-01, 2.514e-01, 3.180e-02, 1.199e-01, -9.426e-02, 2.046e-01));
	r += mul(s5_6, M4(-3.787e-01, 1.389e-01, 3.114e-01, 1.714e-01, 5.573e-02, 3.739e-02, -8.473e-02, -4.610e-02, 9.574e-02, 2.153e-03, -8.866e-03, -4.410e-02, -1.155e-02, -5.400e-02, 1.925e-02, 1.738e-01));
	r += mul(s5_7, M4(-8.150e-02, -2.090e-01, -1.538e-01, 4.760e-01, -1.717e-01, 1.913e-02, 9.299e-03, 1.178e-02, 8.386e-02, -2.141e-02, -4.802e-02, -2.068e-01, 1.744e-01, -4.770e-02, -1.123e-01, 1.521e-01));
	r += mul(s5_8, M4(1.337e-01, -5.348e-02, 1.427e-02, -1.795e-02, 2.848e-03, -1.898e-02, -4.698e-03, -4.581e-02, -1.794e-01, 5.208e-02, 2.183e-02, 8.358e-02, -2.178e-01, -3.258e-03, -1.373e-01, 1.160e-01));
	r += mul(s6_0, M4(-2.858e-02, -5.607e-02, 6.122e-02, 1.521e-02, 1.714e-02, 1.630e-01, -1.331e-01, 7.024e-02, -6.379e-02, -2.672e-02, 2.664e-02, -1.155e-01, -3.388e-02, 4.736e-02, -8.130e-02, 3.151e-01));
	r += mul(s6_1, M4(-2.304e-01, 4.245e-02, -3.877e-01, 4.955e-02, -4.519e-02, 4.113e-02, 5.945e-02, 4.988e-02, 6.812e-02, 1.708e-02, 5.513e-02, 2.077e-01, 5.672e-03, 3.768e-02, -1.364e-02, 9.538e-02));
	r += mul(s6_2, M4(3.777e-01, 2.776e-03, 4.238e-01, 2.226e-01, 4.303e-02, 1.494e-01, -6.361e-02, 2.114e-02, 3.915e-02, -5.454e-02, -1.746e-02, -8.090e-02, -4.120e-02, -5.266e-03, -3.846e-02, 1.031e-01));
	r += mul(s6_3, M4(-1.792e-01, 7.010e-02, -3.349e-01, 1.673e-01, -9.938e-02, 1.228e-01, 4.501e-02, -3.223e-02, -1.085e-01, -2.602e-02, 3.112e-01, 1.934e-02, -4.332e-02, -1.565e-02, 2.233e-01, 2.114e-01));
	r += mul(s6_4, M4(3.164e-02, 1.312e-01, 2.002e-01, -8.395e-04, -6.305e-02, 1.137e-01, -8.620e-03, -1.526e-02, -3.245e-02, 2.534e-02, 3.431e-01, 2.682e-01, 6.176e-02, 7.767e-02, -1.683e-02, -1.062e-01));
	r += mul(s6_5, M4(3.983e-01, -1.263e-01, 1.939e-01, -4.347e-01, 1.405e-02, 1.783e-01, -9.961e-02, 3.092e-02, -1.693e-01, -2.893e-02, 2.976e-02, 8.765e-02, -8.958e-02, 6.622e-03, -1.051e-01, -2.510e-01));
	r += mul(s6_6, M4(3.025e-02, 4.474e-02, 8.671e-02, -1.723e-01, -3.552e-02, 6.098e-02, 6.620e-02, -6.662e-02, -2.257e-01, -6.825e-02, -6.809e-02, 8.563e-02, -3.061e-02, -7.433e-03, 9.983e-02, 1.045e-01));
	r += mul(s6_7, M4(2.197e-02, 1.158e-01, 1.172e-01, -8.142e-02, 8.136e-03, 1.074e-01, 8.381e-03, -8.652e-02, 1.715e-01, -9.235e-02, -4.468e-03, 2.780e-01, 1.224e-01, -4.897e-02, -1.505e-01, -3.873e-01));
	r += mul(s6_8, M4(1.077e-01, 9.874e-02, 2.784e-01, 1.056e-02, 1.047e-01, 3.006e-02, 7.689e-02, -7.230e-02, -7.157e-02, 2.606e-02, 1.841e-02, 7.986e-02, 2.093e-01, -7.551e-02, 1.089e-01, -2.931e-01));
	r += mul(s7_0, M4(-7.262e-02, -2.929e-02, 1.450e-01, -1.313e-01, -2.737e-02, 2.686e-01, 1.031e-01, -1.140e-01, 6.014e-02, -1.711e-02, -1.712e-01, 2.743e-01, -3.961e-03, 5.349e-02, 3.779e-02, -1.480e-01));
	r += mul(s7_1, M4(-2.523e-01, -8.444e-02, -5.685e-02, 1.679e-01, 1.718e-01, 3.372e-02, -1.776e-01, 3.977e-01, -1.106e-01, 4.569e-02, -3.599e-02, -6.396e-02, -5.179e-02, 1.339e-02, 7.202e-02, 8.055e-02));
	r += mul(s7_2, M4(1.646e-02, -4.254e-02, -2.996e-02, 2.612e-01, 3.034e-01, 2.075e-01, 5.784e-02, 1.962e-01, -4.796e-02, -1.175e-02, 1.115e-02, -4.677e-02, 1.267e-01, -9.842e-02, 5.732e-02, 4.389e-02));
	r += mul(s7_3, M4(-1.339e-01, 4.460e-02, -1.090e-01, 4.402e-02, -6.307e-01, 2.004e-01, 1.306e-01, -1.242e-01, 3.439e-01, 5.751e-02, 1.782e-01, -4.005e-02, 1.038e-02, 3.962e-02, -7.156e-02, -5.698e-02));
	r += mul(s7_4, M4(-5.161e-02, 1.104e-01, 1.064e-01, -2.529e-01, 1.336e-01, -1.512e-01, 2.080e-02, -3.020e-01, 1.244e-01, 1.446e-01, 2.897e-01, -3.604e-01, 3.560e-02, 2.879e-01, -6.541e-02, 4.722e-02));
	r += mul(s7_5, M4(-1.565e-02, -8.385e-02, -1.309e-01, -2.954e-01, 5.100e-03, 3.356e-01, -2.372e-01, -1.208e-01, -7.546e-02, 2.556e-02, 2.936e-02, -2.685e-01, 1.546e-01, 4.975e-02, -7.203e-02, 3.938e-02));
	r += mul(s7_6, M4(-9.647e-02, -2.487e-03, 1.106e-01, -1.505e-02, -1.015e-01, 1.383e-01, 1.683e-01, -5.457e-02, -4.709e-02, 2.964e-02, 1.209e-01, -1.821e-01, 1.030e-01, 8.563e-02, -3.546e-01, 1.567e-02));
	r += mul(s7_7, M4(5.586e-03, 7.714e-04, -8.230e-02, -7.812e-02, -2.193e-02, -9.724e-02, -2.037e-01, -2.090e-02, 1.396e-01, 1.463e-02, 5.520e-02, -2.476e-01, 9.839e-02, 2.303e-02, -8.032e-02, 1.211e-02));
	r += mul(s7_8, M4(4.894e-02, -2.312e-02, -1.515e-01, -7.387e-02, 2.279e-01, 1.422e-01, 1.680e-02, -1.466e-02, 9.482e-02, -2.726e-02, -3.037e-02, 2.144e-01, 1.412e-01, 2.566e-02, 1.389e-02, 2.099e-01));
	r += V4(-7.067e-03, -2.480e-01, 1.181e-02, 1.629e-02);
	return r;
}

V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) {
	V4 r = 0.0;
	r += mul(s0_0, M4(-6.296e-02, 7.760e-03, 6.615e-03, -6.910e-02, -1.733e-01, -1.156e-01, -1.551e-01, -1.193e-01, 4.787e-02, 6.314e-03, -3.864e-02, -1.277e-01, -1.072e-01, -1.677e-01, -1.273e-01, -6.141e-01));
	r += mul(s0_1, M4(-1.401e-02, -7.855e-02, 7.498e-02, 1.439e-01, -1.983e-01, -1.254e-01, -6.861e-02, 2.587e-01, -1.967e-01, -4.699e-02, 1.141e-01, 1.117e-01, -1.657e-01, -1.043e-01, -1.024e-01, -3.792e-01));
	r += mul(s0_2, M4(-8.788e-02, 1.379e-02, 3.835e-02, -4.003e-02, 7.202e-02, -2.507e-02, -7.286e-03, 9.680e-02, 4.744e-02, 7.028e-02, 4.711e-02, 1.302e-01, 5.147e-02, -1.442e-01, -1.021e-01, -1.200e-01));
	r += mul(s0_3, M4(-1.092e-01, 3.675e-02, 1.100e-01, -1.764e-02, 1.332e-01, 3.322e-02, -1.338e-01, 3.050e-01, -1.125e-01, 1.422e-01, 3.435e-02, 5.083e-02, -1.082e-01, 3.614e-03, -1.721e-01, -2.248e-01));
	r += mul(s0_4, M4(-1.823e-01, -4.488e-02, 2.356e-01, 6.690e-03, 4.926e-02, -6.168e-02, -6.737e-02, 9.670e-02, 7.045e-02, -9.428e-02, -1.176e-01, 2.986e-01, -8.239e-03, -1.726e-01, -2.281e-01, 4.943e-02));
	r += mul(s0_5, M4(-2.979e-02, 1.098e-02, 1.388e-01, 1.658e-02, 8.264e-02, -1.971e-02, -7.715e-02, 3.450e-02, -7.950e-02, 4.570e-02, 4.495e-02, -3.240e-03, -2.056e-01, 8.439e-02, -1.198e-02, -8.534e-03));
	r += mul(s0_6, M4(2.039e-02, 1.046e-01, 1.343e-01, -1.218e-01, 1.207e-02, -9.623e-02, -7.417e-02, 9.335e-03, -3.468e-03, 9.762e-02, 9.744e-03, -4.961e-02, -7.627e-02, 9.029e-02, 1.106e-02, 4.004e-03));
	r += mul(s0_7, M4(8.910e-02, -1.111e-02, -9.261e-03, -1.198e-01, 5.008e-02, -1.737e-02, -2.231e-01, 9.532e-02, 1.245e-03, -3.073e-02, -5.711e-02, 6.196e-02, -1.358e-01, -1.777e-01, 1.823e-03, 6.634e-02));
	r += mul(s0_8, M4(2.888e-02, 1.446e-01, 1.182e-01, -2.587e-02, -7.939e-03, -6.873e-02, 9.312e-02, -7.450e-02, 1.244e-02, -1.426e-01, -1.923e-02, -9.536e-02, 1.318e-01, -4.630e-02, 1.188e-01, -5.019e-02));
	r += mul(s1_0, M4(8.971e-02, -9.082e-02, -7.461e-02, -2.907e-02, -1.107e-01, 1.039e-01, -8.184e-03, -1.111e-01, 2.382e-02, 1.451e-01, -4.172e-02, 1.012e-02, 2.358e-01, -1.155e-01, 3.239e-02, 7.585e-02));
	r += mul(s1_1, M4(2.872e-02, -2.363e-01, 7.393e-02, 2.592e-01, 2.485e-02, 4.569e-02, 8.531e-02, 1.181e-01, 2.343e-02, 1.434e-01, -1.593e-01, 5.882e-02, 3.096e-01, 4.042e-02, 6.920e-02, -3.214e-02));
	r += mul(s1_2, M4(-3.794e-02, -6.319e-03, 2.241e-01, 2.066e-02, -1.690e-01, 2.888e-02, -2.124e-02, 2.206e-02, 6.969e-02, 1.724e-01, -4.655e-02, 1.423e-01, 3.927e-02, 1.894e-02, 1.240e-01, 4.935e-02));
	r += mul(s1_3, M4(-4.410e-01, -4.775e-02, -2.330e-01, 2.230e-01, -2.839e-01, -6.997e-02, -1.015e-01, -1.054e-01, -2.014e-01, 2.071e-01, -1.688e-02, 1.301e-01, 2.427e-01, -4.421e-02, 8.016e-03, 4.136e-02));
	r += mul(s1_4, M4(-3.077e-01, -1.590e-01, -1.906e-01, 1.468e-01, 1.443e-02, -1.398e-01, -2.670e-02, -7.530e-02, 1.152e-01, 8.916e-02, -5.355e-02, 2.350e-01, 3.961e-01, 1.659e-01, -1.960e-01, 7.532e-02));
	r += mul(s1_5, M4(-2.561e-01, 1.781e-02, 1.265e-02, 1.338e-01, 8.716e-02, 2.390e-02, 9.098e-02, -8.604e-03, -2.280e-01, 2.041e-02, 1.755e-01, 1.053e-01, 9.669e-03, 2.135e-01, 1.511e-01, -3.904e-02));
	r += mul(s1_6, M4(1.296e-01, -1.541e-01, -2.282e-01, 1.621e-01, 1.033e-02, -6.657e-02, 1.396e-01, -7.432e-02, 2.574e-01, 2.337e-02, -9.461e-02, -5.584e-02, 6.284e-02, 1.940e-01, 1.195e-01, -1.045e-02));
	r += mul(s1_7, M4(2.040e-01, 1.778e-02, -3.518e-01, 3.646e-02, 4.260e-02, 2.341e-01, 1.879e-02, -6.436e-02, 1.185e-01, -2.253e-01, 7.939e-02, 1.112e-02, -5.558e-02, 1.451e-01, 4.042e-02, -7.908e-02));
	r += mul(s1_8, M4(-4.773e-02, 1.554e-01, -1.877e-01, -1.379e-01, -5.846e-02, -1.405e-02, 9.912e-02, -5.957e-02, 3.975e-01, -5.402e-02, -5.425e-02, 4.381e-02, -1.086e-01, 5.943e-02, 2.654e-02, 5.067e-02));
	r += mul(s2_0, M4(6.013e-03, -1.253e-01, 1.684e-01, 1.034e-02, 6.153e-03, -2.489e-02, 2.605e-02, -3.455e-02, -2.483e-02, -7.223e-02, -2.935e-02, -3.720e-02, -1.545e-01, 7.488e-02, 1.590e-01, -9.646e-02));
	r += mul(s2_1, M4(-1.124e-01, -2.540e-02, -1.007e-01, 3.891e-01, 9.111e-02, -6.169e-02, 4.814e-03, -2.303e-02, 1.628e-01, 4.082e-03, -1.690e-02, 3.356e-02, -2.616e-01, 3.774e-03, -2.280e-01, 2.459e-01));
	r += mul(s2_2, M4(3.480e-02, 6.542e-02, 3.395e-02, 1.568e-01, 1.253e-01, 1.658e-02, -1.394e-02, 9.313e-02, 1.123e-01, 3.798e-02, -7.904e-02, 5.355e-02, 1.717e-02, -7.496e-02, 1.604e-01, -1.738e-02));
	r += mul(s2_3, M4(-3.165e-03, -8.321e-02, -1.332e-01, -2.712e-02, 8.552e-02, -2.493e-01, -1.120e-01, 1.037e-03, 1.315e-01, -1.144e-01, -5.099e-02, 3.977e-02, 1.624e-01, -2.295e-01, 2.364e-01, 6.290e-02));
	r += mul(s2_4, M4(-9.325e-02, -1.038e-01, 1.666e-02, 1.585e-01, -7.056e-02, 1.631e-01, -3.784e-03, 1.019e-01, -1.726e-01, 3.181e-03, -9.122e-02, -7.055e-02, 5.812e-01, -2.316e-01, -3.565e-02, -7.114e-02));
	r += mul(s2_5, M4(-6.748e-02, -1.361e-01, -1.217e-01, -7.496e-02, 1.507e-03, -1.285e-01, 9.394e-02, 2.374e-02, -1.774e-01, -6.646e-02, 3.708e-02, -1.242e-02, 1.416e-01, -1.894e-01, -1.324e-01, 1.631e-01));
	r += mul(s2_6, M4(1.277e-01, 4.821e-02, 9.689e-02, 3.256e-02, -3.484e-02, -2.009e-02, -5.629e-02, -5.021e-02, 1.237e-01, -1.442e-02, -4.216e-03, 9.495e-02, -7.393e-02, 2.990e-02, 3.954e-01, 3.259e-02));
	r += mul(s2_7, M4(1.946e-01, 1.638e-01, 5.388e-02, -1.684e-02, 1.038e-01, 1.256e-02, 1.280e-01, -1.644e-04, 2.570e-01, -2.273e-01, 1.624e-01, -3.824e-02, 1.537e-01, -1.509e-01, -4.485e-02, -1.919e-02));
	r += mul(s2_8, M4(1.524e-02, 1.102e-01, 1.297e-01, -6.958e-02, 3.049e-02, -1.014e-01, -4.677e-03, 6.799e-02, 1.526e-01, -7.553e-02, 2.533e-03, 1.559e-01, -9.696e-03, -2.257e-01, 2.736e-01, -2.925e-01));
	r += mul(s3_0, M4(-5.678e-03, -9.975e-02, 5.608e-02, -1.878e-02, 1.492e-01, -1.788e-01, 6.868e-02, -3.736e-02, -9.419e-02, -1.721e-02, 1.413e-02, -1.774e-02, -7.584e-02, 6.618e-03, 2.523e-02, -6.888e-02));
	r += mul(s3_1, M4(9.743e-02, -5.011e-02, -1.033e-01, 1.148e-01, 7.509e-02, 1.483e-01, -2.218e-02, -2.814e-01, 2.120e-01, 6.953e-03, 7.592e-02, -2.436e-01, -1.137e-01, -2.710e-02, -8.729e-02, -4.858e-02));
	r += mul(s3_2, M4(5.880e-02, 2.609e-02, 4.146e-02, 2.087e-01, -1.517e-01, 9.844e-02, 5.635e-02, -2.144e-01, -8.840e-02, 1.367e-01, 2.265e-03, 1.798e-02, -8.372e-02, -1.541e-02, -8.325e-02, 6.236e-02));
	r += mul(s3_3, M4(1.284e-01, -5.165e-02, -1.073e-01, 1.566e-02, -8.156e-02, 9.434e-02, -1.572e-01, -2.248e-01, 1.289e-02, -2.447e-02, -1.321e-02, -1.364e-01, 1.252e-03, -2.467e-01, 9.000e-02, 1.592e-01));
	r += mul(s3_4, M4(3.242e-02, 8.790e-02, 6.500e-02, 1.169e-01, 1.310e-01, 5.295e-02, -3.770e-01, 2.659e-02, -1.715e-01, 7.007e-02, -3.154e-02, -1.503e-01, 1.687e-01, -1.812e-01, 1.072e-01, -5.560e-02));
	r += mul(s3_5, M4(5.164e-02, 2.774e-02, 3.806e-02, -1.682e-01, 2.807e-01, -1.499e-01, -1.471e-01, -1.294e-01, 3.739e-02, 1.806e-02, -3.975e-02, -7.185e-02, 1.505e-01, -7.103e-03, -1.306e-01, 1.362e-01));
	r += mul(s3_6, M4(1.583e-01, -2.523e-01, -9.988e-02, -4.831e-02, 1.392e-01, 2.817e-01, -2.417e-01, -1.503e-01, 2.149e-02, 4.919e-01, 7.501e-02, -4.918e-02, -3.924e-03, 2.290e-01, 5.465e-02, -3.692e-02));
	r += mul(s3_7, M4(-2.568e-01, 2.126e-01, -9.229e-02, -9.968e-02, 2.697e-01, -2.022e-01, 7.863e-02, -1.691e-01, -3.892e-01, 8.126e-02, 5.303e-01, -9.675e-02, -5.414e-02, -5.654e-02, -5.946e-02, -1.013e-01));
	r += mul(s3_8, M4(-4.412e-02, 8.399e-02, -2.984e-01, 1.498e-01, -1.460e-01, -1.423e-01, -2.728e-01, 7.422e-02, -1.358e-01, 5.904e-02, -9.015e-02, -2.758e-02, 5.924e-02, -1.556e-01, 5.550e-02, -4.398e-02));
	r += mul(s4_0, M4(-1.170e-01, -1.378e-01, -4.116e-02, 1.387e-01, 1.216e-01, 9.390e-02, 1.264e-01, -1.458e-01, 8.866e-02, -1.196e-01, 1.862e-01, 5.594e-01, 4.088e-02, -5.253e-03, -3.735e-02, 1.472e-01));
	r += mul(s4_1, M4(8.938e-03, -6.878e-03, -2.098e-02, -3.777e-02, 2.842e-01, 1.345e-01, 1.345e-02, -2.262e-02, -3.473e-02, -1.377e-02, -5.251e-02, -3.000e-01, 5.819e-02, 7.130e-03, 8.860e-02, -4.931e-02));
	r += mul(s4_2, M4(-7.974e-02, -1.470e-02, -1.071e-01, 6.803e-02, 2.739e-02, 1.840e-01, 1.857e-02, 1.595e-02, -9.594e-02, -2.005e-01, -1.699e-01, -6.428e-02, -1.115e-01, 2.762e-02, -1.109e-01, -5.929e-02));
	r += mul(s4_3, M4(-7.114e-02, 2.181e-02, -2.073e-01, -1.219e-02, 4.196e-02, 2.025e-01, 5.159e-02, 1.224e-01, -2.798e-01, -1.442e-01, 1.147e-02, 7.897e-02, -3.916e-03, 3.012e-02, -1.081e-01, 3.503e-03));
	r += mul(s4_4, M4(-3.617e-02, 1.201e-01, 5.453e-02, 1.035e-01, -2.327e-01, 1.920e-01, -6.373e-02, 2.633e-01, -1.994e-01, 1.184e-01, 1.997e-01, -8.347e-02, 6.505e-02, -1.572e-01, -3.503e-02, 1.064e-01));
	r += mul(s4_5, M4(-1.005e-01, -2.648e-02, -4.836e-02, 8.596e-02, 9.809e-02, 7.420e-02, 1.844e-01, -1.432e-01, 1.792e-01, -2.256e-01, -4.148e-01, 9.456e-02, 2.001e-02, -3.829e-02, -2.361e-01, -1.106e-01));
	r += mul(s4_6, M4(1.471e-02, 9.680e-02, 5.551e-02, -5.460e-02, 6.416e-02, 1.523e-02, -1.561e-01, -9.986e-02, 4.359e-02, -1.792e-01, 2.300e-01, 3.665e-02, -1.285e-02, -1.844e-01, -8.183e-02, -2.661e-02));
	r += mul(s4_7, M4(1.598e-02, -1.975e-02, 2.088e-01, -8.643e-02, 1.618e-01, 1.001e-01, -1.640e-01, -2.352e-02, 2.128e-01, -4.072e-04, -5.130e-01, 4.805e-04, -5.274e-02, 9.536e-02, -7.823e-02, 5.530e-02));
	r += mul(s4_8, M4(-2.983e-02, 4.106e-02, 4.312e-02, -8.918e-02, -1.063e-01, 2.148e-01, 1.454e-02, 7.327e-02, -2.582e-03, 2.259e-02, -3.194e-01, -1.971e-02, 2.798e-02, 5.562e-02, -1.398e-01, -6.819e-02));
	r += mul(s5_0, M4(4.513e-01, 4.676e-01, 5.226e-01, 1.764e-01, -9.800e-02, -7.946e-02, 2.521e-02, -4.449e-02, 6.989e-02, -8.352e-02, -4.870e-02, 3.570e-02, 4.503e-02, 2.162e-01, -9.362e-02, 3.516e-01));
	r += mul(s5_1, M4(-5.371e-02, 1.972e-01, -4.532e-02, 2.918e-01, -2.111e-01, -3.814e-02, 1.917e-01, -5.373e-03, 6.381e-02, 1.753e-02, 6.961e-02, 3.351e-02, 1.103e-01, -9.770e-02, -9.194e-02, -5.176e-02));
	r += mul(s5_2, M4(9.004e-02, -1.363e-02, -1.163e-02, -9.717e-02, -1.343e-01, -5.610e-02, -1.661e-01, 3.208e-02, 8.446e-02, -3.063e-02, 7.279e-02, -1.185e-01, 3.186e-01, 6.196e-02, 1.419e-01, 1.332e-01));
	r += mul(s5_3, M4(2.893e-01, -4.345e-02, 6.999e-02, -4.645e-02, 1.928e-01, -2.891e-01, -5.506e-02, -5.547e-02, 6.418e-02, 5.734e-02, 6.073e-03, 1.036e-01, 1.590e-01, -1.633e-01, -2.515e-02, -5.278e-02));
	r += mul(s5_4, M4(-1.224e-01, -2.496e-01, 4.529e-02, 3.488e-01, 1.504e-03, -4.240e-01, 2.477e-02, 4.511e-02, 5.403e-02, 1.711e-01, 1.764e-01, 3.557e-02, 1.015e-01, -3.583e-01, 3.328e-01, 7.640e-02));
	r += mul(s5_5, M4(-1.450e-01, -2.665e-01, -1.446e-01, 1.696e-01, -1.642e-01, -7.631e-02, -3.422e-03, -9.272e-02, -1.440e-02, 3.656e-02, 4.633e-02, 6.810e-02, 7.371e-02, 2.298e-01, 2.306e-01, -2.436e-01));
	r += mul(s5_6, M4(3.661e-01, 1.620e-01, 4.195e-01, -3.285e-01, 1.617e-02, -2.502e-01, 1.880e-02, 1.265e-01, -4.917e-02, 4.856e-02, 1.003e-01, -7.730e-02, -1.180e-01, -1.242e-01, 6.801e-03, 3.626e-03));
	r += mul(s5_7, M4(3.307e-01, -1.375e-01, -2.029e-01, 2.181e-01, 1.969e-01, -3.681e-01, 1.225e-01, -2.982e-02, 4.205e-02, 8.890e-02, -1.453e-01, -8.069e-02, 3.265e-01, -7.184e-02, 6.168e-02, -1.892e-01));
	r += mul(s5_8, M4(1.585e-01, 2.796e-01, 2.616e-01, 8.947e-02, 1.624e-01, -1.246e-01, -2.392e-01, 9.131e-02, -8.142e-02, -1.192e-02, 3.717e-02, -8.612e-02, 1.071e-01, -1.051e-01, 1.600e-01, 7.882e-02));
	r += mul(s6_0, M4(-5.474e-02, -1.008e-01, 1.636e-02, 1.844e-01, 1.248e-01, 3.317e-02, 1.378e-02, 3.465e-03, 7.883e-02, -1.977e-02, 1.255e-01, -1.549e-01, -7.282e-02, 1.778e-01, -5.500e-02, 9.860e-02));
	r += mul(s6_1, M4(2.543e-01, -2.954e-01, 4.462e-02, -9.544e-02, 1.528e-02, -9.023e-02, 1.001e-01, -1.330e-01, 6.335e-03, -2.947e-02, 5.462e-02, -1.493e-01, 4.117e-02, -6.603e-02, -5.900e-03, 3.771e-02));
	r += mul(s6_2, M4(6.381e-02, 2.838e-03, -3.115e-01, 9.896e-02, 9.216e-02, -2.670e-02, 6.396e-02, -2.471e-02, 3.709e-02, -6.857e-02, -3.678e-02, 4.962e-02, 8.385e-03, 1.931e-01, 1.366e-02, -1.669e-02));
	r += mul(s6_3, M4(8.992e-02, -8.330e-02, 2.828e-02, -1.335e-01, 4.957e-02, 7.820e-03, 7.989e-02, 6.431e-02, 1.280e-01, -2.863e-03, 1.777e-01, -1.335e-01, -5.924e-02, 1.229e-01, -7.804e-02, 1.636e-01));
	r += mul(s6_4, M4(-1.124e-01, -1.182e-01, 3.981e-02, 2.047e-01, -1.712e-02, -1.877e-02, 1.001e-01, -3.272e-02, -1.424e-01, -8.957e-02, -6.594e-02, -1.042e-02, -9.962e-02, 2.836e-02, 6.545e-02, 1.542e-01));
	r += mul(s6_5, M4(5.565e-03, 2.089e-01, -8.464e-02, 5.421e-02, -1.540e-01, -5.868e-02, 1.151e-01, -1.651e-01, -1.522e-03, 3.179e-02, -1.211e-02, 1.724e-02, 4.772e-02, -2.717e-01, -1.096e-03, -2.160e-01));
	r += mul(s6_6, M4(-1.562e-02, 1.106e-01, 3.543e-02, 7.591e-02, -3.997e-02, 6.842e-02, 1.129e-01, 1.381e-01, 1.876e-01, 4.092e-01, 1.209e-01, 6.818e-02, -1.437e-01, 1.191e-01, -1.169e-01, 8.674e-02));
	r += mul(s6_7, M4(-4.597e-02, -3.929e-02, -1.468e-02, 7.116e-02, -8.660e-02, 3.209e-02, 3.214e-02, -3.238e-02, -1.492e-01, 2.566e-02, 1.710e-01, -1.887e-01, -2.617e-01, 3.479e-01, -1.033e-01, 2.452e-01));
	r += mul(s6_8, M4(5.360e-01, -6.639e-02, 4.089e-02, -2.392e-02, -2.369e-01, 1.002e-01, 2.614e-02, -3.216e-02, -3.226e-02, 8.686e-02, 2.782e-02, 3.562e-02, 5.844e-02, 8.772e-02, 3.218e-01, -2.124e-03));
	r += mul(s7_0, M4(-4.048e-02, -9.997e-02, 8.137e-02, 2.078e-02, 1.268e-01, -1.598e-01, 3.866e-02, -1.074e-01, -2.593e-01, 9.528e-02, 9.960e-02, 5.756e-02, 1.358e-01, -5.139e-03, -1.316e-02, -1.092e-01));
	r += mul(s7_1, M4(-1.213e-01, 8.196e-02, -8.100e-02, 1.871e-01, 1.131e-01, 1.529e-01, -6.739e-03, -2.926e-01, 6.150e-02, -3.458e-02, 8.555e-02, -9.260e-02, 1.937e-02, -8.979e-02, 2.298e-02, 8.512e-02));
	r += mul(s7_2, M4(-8.931e-03, -3.090e-03, -1.055e-01, -9.030e-02, 8.746e-02, -9.688e-02, 9.056e-02, 8.719e-02, 1.805e-03, -6.690e-02, 3.257e-02, -1.213e-02, 1.109e-02, 2.994e-02, 2.223e-03, 1.758e-01));
	r += mul(s7_3, M4(-1.470e-01, -1.280e-02, 6.654e-02, 4.358e-02, 9.439e-02, 1.011e-01, 1.927e-01, 2.059e-01, -2.276e-01, 1.261e-01, -6.429e-02, -1.375e-02, 2.319e-01, -1.480e-01, -1.147e-01, -3.065e-01));
	r += mul(s7_4, M4(-3.930e-02, -2.527e-01, 2.501e-01, 8.167e-02, -1.693e-01, -1.588e-01, 2.412e-01, 4.158e-01, 1.166e-01, -1.097e-01, -1.071e-01, 1.972e-01, 2.167e-01, -5.103e-02, -2.525e-02, 4.689e-02));
	r += mul(s7_5, M4(-1.498e-01, 9.670e-02, -3.224e-02, -1.630e-01, -3.076e-01, 1.045e-02, 1.002e-01, -1.043e-01, 1.172e-01, 7.072e-04, -7.597e-02, 9.863e-03, -5.800e-02, -4.964e-02, -3.846e-02, -9.352e-04));
	r += mul(s7_6, M4(-1.372e-01, 1.598e-01, -1.029e-01, -7.709e-02, 4.725e-02, 5.317e-02, 4.150e-01, 7.688e-02, -2.923e-01, 2.772e-01, -3.063e-01, 7.638e-02, 1.667e-01, -1.784e-01, -5.216e-02, -2.791e-02));
	r += mul(s7_7, M4(-8.641e-02, 1.598e-01, 8.128e-02, -4.598e-02, 9.464e-02, 2.164e-01, 4.623e-01, 4.714e-02, -1.506e-01, 1.833e-01, -8.676e-02, 3.411e-01, -2.230e-02, -1.713e-01, 4.596e-02, -2.925e-02));
	r += mul(s7_8, M4(-9.475e-02, -3.959e-02, 7.701e-02, 4.111e-02, -4.562e-01, 2.156e-02, -2.381e-03, -2.020e-01, 8.663e-02, 5.398e-02, -1.718e-01, -3.074e-02, -5.906e-02, -4.721e-02, 2.176e-01, -8.696e-02));
	r += V4(7.251e-02, -4.031e-02, -3.769e-02, 6.364e-02);
	return r;
}

V4 f2(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) {
	V4 r = 0.0;
	r += mul(s0_0, M4(-1.698e-02, -1.873e-01, -6.680e-02, 4.658e-02, -1.640e-02, -4.673e-02, 2.303e-02, -7.395e-02, 9.013e-02, -2.312e-02, 7.911e-02, -3.101e-02, 1.765e-01, -3.523e-01, 2.891e-01, -1.342e-01));
	r += mul(s0_1, M4(-5.807e-02, 1.886e-01, 3.353e-03, -1.506e-02, -2.154e-01, -5.218e-03, 2.449e-01, -1.151e-01, 6.348e-02, -1.934e-02, -3.096e-02, 1.163e-02, 5.664e-01, -1.311e-01, 3.078e-01, 8.386e-02));
	r += mul(s0_2, M4(-4.766e-02, -3.748e-02, 1.156e-01, -1.899e-02, 2.700e-02, 7.951e-02, -1.855e-02, -1.360e-01, -9.049e-02, 1.629e-01, 5.111e-02, 2.138e-02, -4.326e-02, -6.232e-02, 1.730e-01, 9.890e-02));
	r += mul(s0_3, M4(-9.821e-02, -8.497e-04, 3.265e-02, 3.157e-02, 3.790e-02, 2.119e-01, -2.147e-01, 7.346e-02, 1.442e-01, -5.694e-02, -6.352e-02, 1.299e-01, 2.590e-01, 3.356e-02, 2.009e-01, 3.760e-01));
	r += mul(s0_4, M4(1.312e-01, 2.749e-01, 3.881e-02, 2.240e-01, 1.452e-01, -5.652e-02, 1.002e-01, 3.697e-03, -2.693e-01, -4.560e-02, -5.198e-02, -1.121e-01, -5.385e-02, -2.180e-01, 2.357e-01, -4.682e-02));
	r += mul(s0_5, M4(1.037e-01, -4.922e-03, -1.589e-02, 1.669e-01, 6.173e-02, 8.454e-03, 3.746e-01, 1.235e-01, -8.087e-02, -1.984e-02, 3.124e-02, -1.156e-01, -1.325e-01, 2.218e-01, 1.087e-01, 1.539e-01));
	r += mul(s0_6, M4(-5.949e-03, -2.015e-01, 7.017e-02, -9.461e-02, -1.481e-01, 1.249e-01, -1.170e-01, -1.190e-01, 6.665e-02, -7.714e-02, -3.005e-02, -1.891e-01, 5.926e-02, 2.404e-02, -2.855e-02, 4.209e-02));
	r += mul(s0_7, M4(1.168e-01, 1.383e-01, 4.428e-02, -2.288e-01, 9.044e-02, 2.164e-01, -8.071e-02, 6.872e-02, 1.572e-02, -4.685e-02, 2.427e-01, -2.775e-02, -4.994e-02, 3.562e-02, 4.875e-02, 1.291e-01));
	r += mul(s0_8, M4(4.041e-03, 2.384e-02, -1.436e-01, 7.285e-02, -9.456e-02, -2.413e-02, 1.093e-01, 1.186e-01, -3.203e-02, 1.475e-02, -1.489e-02, 2.053e-01, 5.281e-02, 4.876e-02, -6.375e-02, 1.727e-02));
	r += mul(s1_0, M4(-2.273e-02, 1.106e-01, -2.805e-01, 4.683e-02, 4.003e-02, 1.739e-03, -9.869e-02, -6.333e-02, -7.502e-02, 2.742e-01, 6.364e-02, -8.908e-02, 1.085e-01, 3.470e-01, -3.442e-01, -9.594e-02));
	r += mul(s1_1, M4(-2.592e-01, -2.718e-02, 2.159e-01, 5.279e-02, -7.593e-02, -5.258e-02, 1.568e-01, 1.135e-01, 1.196e-01, 1.640e-01, 2.071e-01, -3.588e-01, -1.379e-01, 1.784e-01, 6.214e-03, -5.460e-02));
	r += mul(s1_2, M4(-1.736e-02, 2.079e-01, 4.595e-03, -1.123e-01, -2.302e-02, 6.572e-02, -1.167e-01, 1.474e-02, -1.697e-03, -2.812e-01, 1.269e-02, -3.907e-03, -6.566e-02, 1.811e-01, 1.914e-01, -3.899e-02));
	r += mul(s1_3, M4(-1.516e-01, -1.128e-02, -1.515e-01, -5.396e-02, 3.382e-02, -4.851e-02, 5.555e-02, 1.203e-01, 7.104e-02, 2.457e-01, 3.103e-02, 7.618e-02, -1.235e-02, -6.414e-02, -1.932e-01, 2.398e-01));
	r += mul(s1_4, M4(9.719e-02, -2.010e-01, 1.421e-01, -3.102e-03, 4.764e-02, 1.152e-01, -1.954e-03, -2.084e-01, 1.056e-01, -7.074e-02, -2.112e-02, -1.057e-01, -1.572e-01, -4.169e-02, -6.214e-02, -7.277e-01));
	r += mul(s1_5, M4(8.558e-02, -2.495e-01, -1.989e-01, 9.590e-02, 1.615e-01, -1.643e-01, -1.051e-02, -9.111e-02, -6.694e-02, 1.960e-01, 3.509e-01, 2.216e-02, -1.859e-01, -3.016e-01, -1.452e-01, 1.759e-01));
	r += mul(s1_6, M4(1.043e-01, -1.302e-01, 3.582e-02, -2.599e-01, 9.496e-02, -1.366e-01, 3.558e-02, -1.240e-01, -8.862e-02, 1.433e-01, -2.149e-02, -1.374e-02, 2.642e-02, -2.283e-02, -2.794e-02, -2.896e-02));
	r += mul(s1_7, M4(1.294e-01, -1.096e-01, -1.064e-01, -2.245e-01, 8.796e-02, 9.860e-02, 2.812e-02, 1.750e-02, 1.139e-01, -1.873e-01, 2.878e-02, 1.735e-01, 1.740e-02, 1.851e-02, -2.990e-02, -1.145e-01));
	r += mul(s1_8, M4(-1.369e-01, -6.807e-02, -5.180e-02, 1.313e-01, -3.927e-05, -1.128e-01, 4.890e-02, 3.989e-02, -2.035e-01, 2.148e-01, -2.251e-01, 1.421e-01, -7.962e-03, 1.509e-01, -1.495e-02, -1.019e-01));
	r += mul(s2_0, M4(-9.294e-02, -2.481e-02, 1.403e-01, -1.359e-01, -1.061e-01, -5.883e-02, -1.400e-01, 5.274e-02, -2.305e-02, 4.316e-02, -1.507e-01, 1.499e-02, 1.564e-02, -4.029e-01, 1.641e-01, -5.780e-02));
	r += mul(s2_1, M4(-1.752e-02, -4.913e-02, 1.778e-02, -1.957e-03, -1.179e-01, 8.784e-02, -1.297e-01, -1.210e-02, -4.552e-02, 7.796e-02, -1.006e-02, -8.141e-02, -2.534e-01, -5.645e-02, 5.620e-02, 7.241e-02));
	r += mul(s2_2, M4(-2.166e-01, -5.779e-02, 1.009e-02, 1.082e-01, 3.005e-02, -2.124e-01, 7.502e-02, 1.306e-02, -2.851e-02, 6.912e-02, 3.177e-02, 1.945e-02, -1.518e-01, -1.881e-01, -1.107e-01, 1.201e-02));
	r += mul(s2_3, M4(-4.604e-02, 9.225e-02, 1.064e-01, 6.817e-03, -1.436e-01, 1.484e-01, 8.814e-03, 1.094e-01, -2.963e-02, -1.773e-01, 4.952e-02, 3.703e-02, -1.068e-01, -4.995e-02, -1.110e-01, -2.143e-01));
	r += mul(s2_4, M4(-1.260e-01, 4.946e-01, 9.564e-02, -8.133e-02, 1.674e-01, -2.238e-01, 7.130e-02, -1.971e-02, 1.666e-01, 2.136e-02, -1.943e-02, 2.817e-02, 6.673e-02, 5.442e-02, -9.210e-02, -2.027e-01));
	r += mul(s2_5, M4(1.928e-02, 3.097e-01, 1.487e-01, -2.217e-01, 1.282e-01, -2.386e-02, 6.971e-03, 8.529e-02, -9.620e-03, -5.562e-02, -1.293e-01, -7.603e-02, 2.167e-02, 1.700e-01, 3.485e-02, -5.729e-02));
	r += mul(s2_6, M4(-8.671e-02, 1.699e-01, 1.681e-01, 5.908e-02, -4.252e-03, -3.336e-02, 1.989e-01, 3.247e-02, 4.496e-02, -6.578e-02, -1.127e-01, -1.141e-01, -1.770e-01, -1.885e-01, 3.977e-02, 7.611e-02));
	r += mul(s2_7, M4(-1.079e-01, -8.983e-02, -1.298e-01, 1.607e-01, -1.065e-01, 1.661e-02, -7.341e-02, -4.099e-02, 2.815e-02, 1.428e-01, 7.976e-02, -2.694e-02, 2.546e-01, -4.262e-02, 6.669e-02, 4.667e-02));
	r += mul(s2_8, M4(-7.842e-02, -1.926e-01, 1.274e-02, -1.604e-02, 1.094e-02, 6.732e-02, 3.969e-02, 7.989e-03, -4.900e-02, -1.781e-01, -2.337e-02, -1.487e-02, 1.947e-01, -2.330e-01, 1.304e-01, -1.592e-02));
	r += mul(s3_0, M4(-1.820e-01, -7.021e-02, -8.828e-02, 7.994e-02, 9.838e-02, -7.559e-02, -6.619e-02, 6.800e-02, -9.744e-03, -6.037e-02, 1.779e-02, -4.527e-02, 3.634e-02, 3.175e-01, 2.481e-03, -1.218e-01));
	r += mul(s3_1, M4(-3.103e-02, 3.963e-02, -7.307e-02, 4.523e-02, -1.794e-01, -3.067e-01, 1.376e-01, 1.912e-01, 9.885e-02, -5.196e-02, -8.585e-02, 9.436e-02, -1.400e-01, 8.002e-02, -5.219e-03, 5.146e-02));
	r += mul(s3_2, M4(1.009e-01, 1.692e-01, 1.049e-01, -9.813e-03, 8.228e-02, 7.716e-02, 3.513e-01, -1.930e-01, -8.766e-02, -7.978e-02, -2.276e-01, -7.862e-02, -1.635e-02, 1.706e-01, -1.281e-01, 5.733e-02));
	r += mul(s3_3, M4(-1.142e-01, -1.328e-01, -6.850e-02, -7.040e-02, -1.122e-01, 1.799e-01, 2.511e-01, 9.606e-02, -9.700e-02, -2.469e-01, 2.237e-01, -2.198e-01, 3.658e-02, -6.923e-02, 1.219e-01, 7.830e-02));
	r += mul(s3_4, M4(1.073e-01, -1.174e-01, 1.235e-02, 1.604e-01, 3.105e-01, -1.302e-02, 3.550e-01, -1.716e-01, 1.775e-01, 2.759e-02, 1.459e-02, 1.077e-01, 3.152e-03, -1.262e-01, -3.016e-02, -1.051e-01));
	r += mul(s3_5, M4(-4.056e-02, 3.065e-02, 9.051e-02, 1.969e-01, 1.910e-01, 2.110e-01, 5.586e-02, 7.756e-02, -1.644e-01, -1.751e-01, -6.551e-02, 4.693e-02, -8.392e-02, 1.207e-01, -9.491e-02, 9.841e-02));
	r += mul(s3_6, M4(4.175e-02, -1.885e-02, -3.087e-02, -3.164e-01, 7.055e-04, 1.036e-01, -1.606e-01, -6.494e-02, 2.920e-01, -4.953e-01, 1.364e-01, 2.848e-02, 2.253e-02, 7.942e-02, 1.192e-01, -3.001e-02));
	r += mul(s3_7, M4(3.773e-03, -4.945e-02, 5.913e-02, -1.400e-01, 1.157e-01, 9.907e-02, -5.604e-02, 8.307e-02, 1.586e-01, 9.916e-02, -1.486e-02, 3.248e-01, 1.851e-01, -7.099e-02, 8.477e-02, 4.372e-02));
	r += mul(s3_8, M4(-2.045e-02, -7.855e-02, -4.495e-02, 6.078e-02, -6.315e-02, -7.862e-02, -1.406e-01, 3.674e-02, -1.628e-01, 1.351e-01, -6.527e-02, -6.756e-03, 6.466e-02, 1.532e-02, 4.055e-02, -3.032e-02));
	r += mul(s4_0, M4(-1.499e-01, -7.250e-02, -6.480e-02, 1.274e-01, 2.127e-01, -3.638e-02, -1.541e-01, 1.998e-02, -1.626e-01, -1.542e-02, -2.297e-01, -4.711e-02, 1.672e-01, 1.783e-01, 1.519e-01, -2.619e-02));
	r += mul(s4_1, M4(-1.092e-01, -8.656e-02, 1.802e-01, -7.827e-02, -4.769e-02, -3.521e-01, -6.076e-02, -1.028e-01, 9.007e-02, 7.495e-02, 1.750e-01, -1.573e-02, -9.365e-02, -1.703e-01, -2.072e-01, -3.748e-02));
	r += mul(s4_2, M4(4.518e-02, -2.296e-02, -1.432e-01, 5.951e-02, 2.120e-01, -1.424e-01, 1.259e-01, 1.291e-01, 1.397e-01, 2.877e-01, 2.697e-01, -2.689e-02, 7.480e-03, 1.533e-01, -1.073e-01, 6.312e-02));
	r += mul(s4_3, M4(-3.880e-02, -4.093e-02, 4.001e-02, -8.178e-02, -6.328e-02, -7.422e-02, 9.309e-02, 2.872e-02, -2.817e-01, 2.359e-01, -1.395e-01, -2.873e-01, -8.667e-02, 1.968e-01, 7.938e-02, -1.771e-02));
	r += mul(s4_4, M4(-2.421e-02, -8.792e-02, 4.204e-02, 3.885e-02, -8.507e-02, -1.308e-01, -1.262e-01, -2.356e-01, 1.499e-01, 1.452e-01, 1.380e-01, 2.543e-01, 1.627e-02, -1.195e-01, 5.032e-02, -1.295e-02));
	r += mul(s4_5, M4(1.467e-02, 1.872e-01, 5.851e-02, -1.954e-02, 1.338e-01, -5.445e-02, -9.792e-02, -1.148e-01, -9.106e-02, 7.280e-02, 1.391e-02, -1.308e-01, -3.100e-02, 9.612e-02, 5.221e-02, 1.605e-02));
	r += mul(s4_6, M4(5.990e-02, -4.208e-02, -1.734e-01, -1.367e-01, -7.889e-02, 1.409e-01, -5.239e-02, -5.982e-02, 1.043e-01, 3.526e-03, 2.075e-03, -2.397e-01, 8.369e-02, 3.376e-02, 7.339e-02, 2.281e-01));
	r += mul(s4_7, M4(2.764e-01, -1.796e-01, 1.455e-01, -3.286e-02, -4.900e-02, -1.909e-02, -2.249e-02, -9.996e-02, -3.512e-02, -1.206e-01, 1.847e-02, 2.303e-02, -6.943e-02, -1.329e-01, -7.892e-02, -7.028e-02));
	r += mul(s4_8, M4(4.555e-02, 1.136e-01, 1.246e-02, -1.455e-01, -4.663e-02, -2.871e-02, -2.046e-01, -1.446e-01, -2.253e-01, 2.763e-02, -1.004e-01, 2.980e-03, 7.329e-02, -1.165e-01, -2.569e-02, -1.452e-01));
	r += mul(s5_0, M4(1.021e-01, -4.993e-01, -3.943e-01, -3.816e-02, -1.130e-01, -9.294e-02, -1.374e-01, -2.294e-02, 1.072e-01, 4.437e-02, -4.967e-02, 2.527e-03, -5.801e-02, 5.197e-01, -6.761e-02, -7.898e-02));
	r += mul(s5_1, M4(1.401e-01, -1.985e-02, -2.018e-01, -2.551e-01, -3.057e-01, -8.481e-02, 7.933e-02, -2.035e-01, 4.128e-02, -2.439e-01, 1.204e-01, -5.914e-02, -1.325e-01, 1.267e-01, 6.798e-02, 1.577e-01));
	r += mul(s5_2, M4(-5.245e-02, -9.068e-02, 4.098e-03, -1.682e-03, -2.397e-01, 3.945e-02, -2.103e-02, 6.739e-02, 9.228e-02, -1.101e-01, 3.726e-01, 7.907e-02, -1.078e-01, -1.618e-01, -8.862e-02, -2.620e-02));
	r += mul(s5_3, M4(-4.351e-01, -1.867e-01, -1.876e-01, -1.895e-01, 7.828e-03, -1.009e-01, 8.182e-02, 2.871e-03, 1.487e-01, 5.502e-02, -2.857e-02, -1.281e-01, 8.090e-02, 2.686e-01, 8.667e-02, 5.965e-02));
	r += mul(s5_4, M4(-1.018e-01, -1.264e-01, -3.769e-02, 4.907e-02, -2.531e-01, -1.157e-02, -2.996e-04, -1.775e-01, 6.139e-02, 1.082e-01, 5.053e-02, 1.333e-01, -3.971e-02, 5.975e-02, 6.767e-02, -1.828e-01));
	r += mul(s5_5, M4(1.820e-01, 1.515e-01, 1.530e-02, 1.505e-01, -9.685e-02, 4.150e-02, 2.998e-02, -1.715e-01, -5.765e-02, -2.729e-02, -5.637e-03, 9.204e-02, -2.588e-02, -4.561e-01, -4.315e-02, -8.377e-02));
	r += mul(s5_6, M4(8.928e-02, 1.850e-01, -1.260e-01, 1.848e-01, -4.262e-02, -5.891e-02, -6.487e-02, 9.753e-02, 5.160e-02, -6.055e-02, 1.049e-01, 9.045e-02, -9.491e-02, 5.606e-02, -1.062e-01, 2.199e-01));
	r += mul(s5_7, M4(1.982e-02, 1.538e-01, -1.591e-01, 3.782e-03, -7.732e-02, 2.261e-01, -9.981e-02, 9.836e-02, 3.086e-02, -1.224e-01, -7.203e-02, -4.307e-02, 9.526e-02, 5.374e-02, -8.144e-02, 1.073e-01));
	r += mul(s5_8, M4(-3.075e-01, -2.543e-02, 1.095e-01, -9.644e-02, -5.159e-02, 8.254e-03, -9.783e-02, 7.402e-02, -2.176e-01, -1.525e-02, 1.419e-02, 5.681e-02, 7.628e-03, -1.650e-01, 1.085e-01, -7.338e-03));
	r += mul(s6_0, M4(1.741e-02, 2.125e-01, 1.168e-01, -4.260e-02, 1.296e-01, 1.149e-01, -1.442e-01, -8.121e-03, -2.016e-02, -1.870e-02, 5.396e-02, 1.021e-01, -1.165e-02, 1.188e-01, -9.040e-03, -3.638e-02));
	r += mul(s6_1, M4(2.115e-01, 3.941e-01, 2.957e-02, 1.120e-01, 7.739e-02, 3.555e-02, -3.015e-02, -9.409e-02, -1.270e-01, 2.272e-01, 7.447e-02, 9.663e-03, -9.259e-02, -1.570e-01, 1.401e-01, -2.905e-02));
	r += mul(s6_2, M4(5.055e-02, -2.221e-01, 9.541e-02, 3.872e-01, -1.639e-01, -2.329e-02, 4.069e-02, 1.590e-02, -8.454e-02, -2.204e-03, -6.812e-02, 1.247e-01, 2.795e-02, 8.716e-02, 2.249e-01, -1.203e-01));
	r += mul(s6_3, M4(9.096e-02, 2.607e-01, 1.479e-02, -8.888e-05, 1.222e-01, 8.601e-02, -5.109e-02, 7.726e-02, 1.441e-01, -2.092e-01, 1.506e-01, 1.928e-02, -6.446e-02, 2.402e-01, -2.574e-01, -2.931e-02));
	r += mul(s6_4, M4(-1.233e-01, 1.028e-01, -4.458e-02, 4.378e-01, -3.634e-02, 1.858e-01, 2.470e-02, -2.198e-01, -2.388e-01, 5.963e-02, 5.429e-01, -1.237e-01, -1.648e-01, 3.738e-02, -3.718e-01, 3.435e-02));
	r += mul(s6_5, M4(1.562e-01, -3.674e-01, 1.336e-01, -2.522e-01, -4.597e-02, 6.303e-03, 9.126e-02, -2.470e-02, -9.233e-02, -6.120e-02, 1.047e-01, 8.981e-02, 1.036e-01, 1.669e-01, 2.349e-02, -2.290e-02));
	r += mul(s6_6, M4(-6.684e-03, -2.623e-01, -1.988e-01, 7.140e-02, 3.641e-02, 1.881e-02, -5.311e-02, 1.847e-01, -5.423e-02, 1.317e-01, -8.327e-02, -6.475e-02, -2.640e-02, -1.547e-01, 2.943e-01, 1.121e-01));
	r += mul(s6_7, M4(2.161e-01, -2.097e-01, 8.820e-02, 2.329e-01, -9.084e-03, -6.403e-02, 8.263e-02, 1.153e-01, -1.475e-01, -2.645e-02, -1.344e-01, 1.451e-01, -1.776e-01, -2.007e-01, 2.800e-02, 7.088e-01));
	r += mul(s6_8, M4(1.772e-01, -2.968e-01, -8.142e-02, -2.700e-01, -3.186e-02, -3.387e-02, 7.155e-02, 8.385e-02, -8.074e-02, -6.685e-02, -1.783e-01, 2.027e-01, -5.418e-02, 7.905e-03, 1.838e-01, -1.703e-02));
	r += mul(s7_0, M4(-8.795e-03, 8.697e-02, -1.522e-02, -4.836e-02, 2.736e-01, -3.156e-02, 2.585e-02, -7.371e-02, -8.043e-02, -9.909e-02, 1.712e-01, 9.713e-03, 1.880e-01, 2.281e-02, -4.252e-02, 3.635e-02));
	r += mul(s7_1, M4(-2.456e-01, -3.123e-02, -8.438e-02, 5.096e-02, 2.406e-02, 5.820e-02, 1.871e-01, 9.246e-02, -4.854e-02, -1.220e-02, 8.671e-02, -8.788e-02, 9.197e-02, -1.051e-02, -1.603e-02, 7.826e-03));
	r += mul(s7_2, M4(-1.740e-01, -1.295e-01, -7.792e-02, 3.404e-02, -1.469e-01, -7.475e-02, 5.725e-02, -2.245e-01, -5.151e-02, -2.124e-02, -8.004e-02, 7.175e-02, 1.452e-02, 1.684e-01, -1.166e-01, -4.261e-02));
	r += mul(s7_3, M4(-2.162e-02, 6.368e-02, 4.331e-02, 3.686e-02, 4.560e-01, 3.422e-02, -7.137e-02, -1.088e-01, 6.296e-02, -1.548e-01, 2.900e-01, 2.713e-02, 2.923e-01, -1.117e-01, 9.811e-02, -5.212e-02));
	r += mul(s7_4, M4(-1.214e-01, -1.009e-01, -1.105e-01, -1.741e-01, -3.244e-01, 2.416e-02, -1.582e-01, -2.779e-01, 3.225e-02, -1.660e-01, 7.940e-02, 1.323e-01, 1.905e-01, -4.041e-02, 6.546e-03, -9.449e-02));
	r += mul(s7_5, M4(-8.208e-02, -3.476e-02, -9.438e-02, -1.004e-01, -2.276e-02, 4.244e-01, -2.006e-02, -1.780e-01, -6.147e-02, 3.855e-02, 2.678e-02, -7.125e-02, 1.039e-01, -2.048e-02, -1.289e-01, -2.667e-02));
	r += mul(s7_6, M4(3.593e-02, -2.194e-02, -7.869e-02, -1.356e-01, -4.294e-02, -6.255e-02, 1.837e-01, 3.174e-01, -4.049e-02, 1.013e-02, 9.296e-02, 1.090e-01, 7.926e-02, -7.720e-02, -1.803e-01, 5.508e-02));
	r += mul(s7_7, M4(-2.289e-02, -5.673e-02, 2.362e-02, 2.427e-01, 7.482e-03, -3.263e-01, 3.090e-01, 3.126e-01, -5.159e-02, 1.584e-01, -1.165e-01, 3.172e-01, 2.170e-01, -7.058e-02, 8.322e-02, -4.230e-02));
	r += mul(s7_8, M4(-4.434e-02, 8.279e-02, -1.851e-01, 7.341e-02, -5.335e-02, 1.912e-01, 3.554e-01, 4.142e-02, 4.895e-02, 3.337e-03, 1.815e-01, -5.542e-02, -2.550e-02, -7.432e-02, -2.534e-02, -3.253e-02));
	r += V4(-5.796e-02, -9.994e-02, -4.215e-02, -3.212e-02);
	return r;
}

V4 f3(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) {
	V4 r = 0.0;
	r += mul(s0_0, M4(-7.087e-02, 6.239e-02, 5.798e-02, 8.463e-02, 2.003e-01, 1.054e-01, -5.041e-02, -9.348e-02, -1.609e-01, 7.109e-02, 1.378e-01, -1.261e-01, -1.773e-01, -1.247e-01, -1.533e-01, -3.260e-02));
	r += mul(s0_1, M4(-5.111e-02, -8.474e-02, -1.873e-01, -1.267e-01, -8.925e-03, 5.333e-02, -2.662e-01, -1.796e-01, -1.243e-02, 2.119e-02, -1.518e-02, -6.461e-02, 5.907e-02, 3.671e-04, -4.520e-02, 4.513e-02));
	r += mul(s0_2, M4(6.582e-03, -1.185e-02, -4.998e-02, -3.709e-02, -2.846e-02, 4.560e-02, -1.383e-02, -1.422e-01, -5.102e-02, -7.843e-02, -4.691e-02, -1.638e-01, 1.126e-01, -1.427e-01, 7.998e-02, 6.866e-02));
	r += mul(s0_3, M4(1.005e-01, 9.193e-02, 1.235e-02, -2.497e-02, 8.452e-02, 7.020e-02, 1.421e-01, 8.788e-02, 8.692e-02, -8.267e-02, -6.585e-05, 1.645e-01, -4.443e-02, -9.324e-02, -2.148e-01, 1.671e-01));
	r += mul(s0_4, M4(4.984e-02, 1.531e-01, -8.617e-02, -2.485e-02, -1.843e-01, 7.348e-02, 6.349e-02, 2.055e-01, 2.145e-02, -5.108e-02, -4.078e-02, 9.661e-02, 8.807e-02, -5.728e-02, 6.109e-03, 2.942e-01));
	r += mul(s0_5, M4(-9.202e-02, -9.519e-02, 1.117e-01, -4.526e-02, -2.775e-01, 7.643e-02, 1.403e-02, 2.340e-02, -1.074e-02, -1.436e-01, 7.396e-03, 1.238e-03, 1.729e-01, 2.339e-02, 1.535e-01, -8.622e-02));
	r += mul(s0_6, M4(1.005e-02, -3.189e-02, 9.263e-02, 3.803e-02, 1.789e-01, -1.022e-01, 6.633e-02, -7.069e-03, -5.458e-02, 3.946e-02, -1.137e-01, -7.933e-02, -8.455e-02, 9.216e-02, 5.908e-03, -6.718e-02));
	r += mul(s0_7, M4(1.734e-01, -4.896e-02, 1.712e-01, 9.361e-03, 3.362e-02, 9.108e-02, 4.730e-01, 2.667e-01, -5.602e-02, -1.280e-01, 1.817e-01, -7.983e-02, 5.922e-03, -1.085e-02, -7.856e-02, 4.042e-02));
	r += mul(s0_8, M4(4.479e-02, -5.251e-02, 3.659e-03, -6.087e-02, -2.521e-02, -9.217e-02, 1.009e-01, 5.147e-02, 7.763e-02, 9.779e-02, -1.913e-02, -1.032e-02, -5.252e-02, -2.632e-02, 6.120e-02, -4.011e-02));
	r += mul(s1_0, M4(1.749e-01, 4.373e-02, 1.850e-01, -1.322e-03, 7.954e-02, 1.727e-02, -1.852e-01, 4.827e-02, 1.903e-01, -1.351e-01, 9.082e-02, -7.283e-02, -1.823e-01, 5.595e-02, -2.808e-02, -9.443e-03));
	r += mul(s1_1, M4(-1.981e-02, 8.999e-02, -2.314e-01, -1.194e-01, 1.176e-01, 2.058e-02, -1.970e-02, -6.448e-02, -1.178e-02, 1.526e-01, 9.951e-02, 2.790e-02, 3.800e-02, 2.846e-01, 1.088e-01, -9.929e-02));
	r += mul(s1_2, M4(-2.137e-03, -9.093e-02, -8.969e-02, -7.525e-02, 2.343e-01, -8.973e-02, -9.970e-02, 7.148e-02, 1.542e-01, 4.977e-02, -1.556e-01, 6.833e-02, -2.306e-02, 9.185e-02, -1.417e-01, 3.659e-02));
	r += mul(s1_3, M4(-9.093e-04, 8.085e-02, 1.412e-01, -2.497e-01, -1.439e-01, 9.891e-02, 2.928e-02, 1.516e-01, 4.379e-03, -4.236e-02, 2.203e-02, -8.157e-02, 7.379e-02, -1.329e-01, 2.084e-01, -7.908e-02));
	r += mul(s1_4, M4(-9.692e-02, 2.172e-01, -5.032e-03, -4.059e-01, -1.811e-01, -1.188e-01, -6.957e-02, 2.046e-01, 2.704e-01, -2.443e-01, 2.411e-01, -4.141e-02, -1.404e-01, -1.501e-01, 3.869e-02, 1.738e-01));
	r += mul(s1_5, M4(1.970e-01, -4.060e-01, -2.504e-01, 2.264e-01, 2.595e-02, -1.535e-01, 1.830e-01, 6.851e-02, -6.238e-02, -3.365e-01, -2.264e-01, -7.431e-03, -4.215e-02, 1.390e-01, -4.475e-01, -1.452e-01));
	r += mul(s1_6, M4(-2.510e-01, 1.528e-02, 4.968e-02, -2.842e-01, -3.769e-02, 9.383e-02, -8.688e-02, -1.122e-01, -1.430e-01, 5.863e-02, -5.752e-02, 1.801e-01, 1.882e-01, 6.842e-02, 2.072e-01, -2.585e-02));
	r += mul(s1_7, M4(-3.887e-02, -5.653e-02, 4.207e-02, -8.963e-02, -5.419e-02, 2.145e-02, 1.625e-01, 1.640e-02, -4.333e-03, 1.425e-02, 1.677e-01, 3.464e-01, 1.892e-01, 1.811e-02, 2.237e-02, -1.581e-02));
	r += mul(s1_8, M4(1.410e-01, -2.081e-01, -4.614e-02, 6.339e-02, 5.790e-02, 1.528e-02, -1.789e-01, -3.247e-02, 2.894e-01, -1.078e-01, 6.089e-02, 6.678e-02, 8.685e-02, 1.617e-02, -9.748e-02, -1.817e-02));
	r += mul(s2_0, M4(-2.281e-01, 1.145e-01, 1.690e-01, 3.860e-03, -2.416e-02, 3.134e-02, 3.746e-02, 9.753e-02, -1.249e-02, -2.384e-02, -6.270e-02, -1.023e-01, 9.561e-02, 1.324e-01, 2.370e-02, -1.242e-01));
	r += mul(s2_1, M4(2.798e-02, 2.944e-02, -1.391e-01, -2.065e-02, 1.531e-02, 8.454e-03, 5.738e-02, -8.349e-02, 1.830e-02, -4.295e-02, -1.300e-01, 3.290e-02, -7.138e-02, -6.105e-02, -1.993e-01, 1.054e-01));
	r += mul(s2_2, M4(1.498e-01, -2.510e-02, 6.616e-02, -8.944e-02, -3.445e-02, 1.348e-01, -5.942e-02, 9.708e-02, 1.031e-01, 2.463e-02, 5.985e-03, -7.200e-02, 3.105e-01, -1.207e-01, 3.550e-02, -1.467e-01));
	r += mul(s2_3, M4(2.385e-02, 1.033e-01, -4.184e-02, 1.449e-01, -1.506e-01, -1.277e-01, 2.225e-01, -1.859e-01, -1.908e-01, -1.564e-01, 7.764e-02, -1.052e-01, -1.490e-02, 1.196e-01, 3.107e-01, -2.853e-02));
	r += mul(s2_4, M4(-8.772e-02, -4.082e-02, -2.519e-01, 2.430e-01, 3.050e-01, -1.207e-01, 2.054e-02, -1.949e-01, 8.330e-02, 1.593e-01, -1.335e-01, -9.296e-02, -6.047e-01, 9.570e-02, -1.153e-01, 3.585e-01));
	r += mul(s2_5, M4(-1.555e-01, -9.061e-02, -1.124e-01, 1.400e-01, 5.122e-03, -4.839e-02, -4.809e-02, 5.546e-02, 1.789e-01, 4.024e-02, -8.952e-02, 4.942e-02, -1.059e-01, -8.778e-02, -2.437e-01, 5.702e-02));
	r += mul(s2_6, M4(9.926e-02, -9.385e-03, 1.125e-01, 9.954e-03, -7.612e-02, 8.703e-02, 5.513e-02, 2.757e-03, -7.221e-02, 4.016e-02, -5.296e-03, -1.003e-01, 5.334e-01, -3.056e-02, -3.064e-01, 7.117e-03));
	r += mul(s2_7, M4(-2.532e-01, -4.781e-02, 9.780e-02, -8.244e-02, 2.035e-02, 4.577e-03, -1.115e-01, -6.034e-02, -2.605e-01, -8.794e-02, 3.998e-01, 5.975e-02, -1.221e-01, -4.130e-03, -1.800e-01, -1.644e-01));
	r += mul(s2_8, M4(-5.837e-02, -1.843e-01, -2.463e-02, -2.233e-01, -1.517e-01, -5.376e-02, -1.700e-01, 5.567e-02, 6.757e-02, -1.117e-01, 4.710e-04, 1.177e-01, -1.470e-01, -1.264e-01, -3.143e-01, -1.695e-01));
	r += mul(s3_0, M4(1.737e-02, 1.084e-01, 1.796e-01, 3.595e-02, -1.737e-01, 8.785e-04, -2.436e-01, -4.031e-02, -1.042e-01, 4.849e-02, 1.806e-02, -9.414e-02, -2.708e-01, 9.794e-02, -1.460e-01, 3.161e-02));
	r += mul(s3_1, M4(1.500e-01, -1.101e-01, -2.117e-01, 1.373e-01, 2.621e-01, -8.611e-02, -1.486e-02, 4.369e-02, -3.867e-02, -6.837e-02, -4.205e-01, 2.570e-03, 4.208e-02, 1.402e-01, 2.627e-01, 7.434e-02));
	r += mul(s3_2, M4(5.101e-02, 4.293e-02, 1.221e-01, 2.186e-02, -1.894e-02, -1.332e-01, -2.449e-01, 3.077e-02, 1.728e-01, 8.873e-02, -1.142e-01, 3.216e-02, 9.510e-02, -2.458e-02, 9.526e-02, -4.923e-02));
	r += mul(s3_3, M4(-7.877e-02, -2.980e-02, 1.517e-01, -4.755e-02, 3.299e-02, -1.722e-02, 6.496e-04, 2.745e-01, -3.029e-01, 2.031e-02, 4.097e-02, 4.456e-02, 1.630e-01, -6.488e-02, 2.990e-01, -5.979e-02));
	r += mul(s3_4, M4(2.165e-01, -1.397e-01, -8.698e-02, 6.277e-02, -1.256e-01, 2.055e-01, 1.409e-01, 2.895e-01, 5.554e-01, 2.140e-01, -1.626e-01, 8.477e-02, -4.270e-01, 1.513e-01, -1.548e-01, -1.152e-01));
	r += mul(s3_5, M4(-6.026e-02, -1.783e-02, 1.314e-01, -6.727e-02, -2.205e-01, -1.427e-01, 2.454e-01, 3.660e-02, -8.011e-02, -6.024e-02, 2.695e-01, 2.440e-01, 2.838e-02, -5.347e-02, -4.060e-02, 8.471e-03));
	r += mul(s3_6, M4(-2.508e-01, 2.804e-01, 2.255e-02, -2.307e-01, 1.237e-02, 2.406e-02, 2.628e-01, 8.921e-02, -2.000e-01, 7.200e-02, 3.133e-01, -4.548e-01, 8.203e-02, -1.264e-01, -3.128e-03, 1.769e-01));
	r += mul(s3_7, M4(7.194e-02, 2.412e-02, -7.567e-02, 8.308e-02, -2.699e-01, 9.371e-02, 1.981e-01, 9.185e-02, 4.334e-02, -4.782e-02, -3.260e-01, -1.932e-01, 2.238e-01, 1.088e-01, -1.397e-02, 9.171e-02));
	r += mul(s3_8, M4(-5.989e-02, 2.174e-02, 1.074e-01, 7.843e-02, 2.823e-01, 2.629e-02, -2.285e-02, 1.446e-01, 1.295e-02, 1.623e-01, -4.525e-02, 1.443e-01, -1.739e-01, 1.968e-02, 1.531e-01, -1.264e-03));
	r += mul(s4_0, M4(3.647e-02, -1.538e-01, -2.943e-02, 1.765e-02, 3.218e-03, 8.694e-02, -1.315e-01, -6.913e-03, 3.649e-01, 1.162e-01, -1.007e-02, -7.707e-02, 5.933e-02, -7.162e-02, -1.167e-01, -4.503e-02));
	r += mul(s4_1, M4(-2.816e-02, -1.272e-03, 1.170e-01, -2.845e-02, 3.693e-01, -1.619e-01, -6.193e-02, -1.078e-01, -2.879e-01, -8.741e-02, 1.235e-02, 3.369e-01, -2.247e-01, 1.600e-01, 2.450e-01, 1.223e-01));
	r += mul(s4_2, M4(1.452e-01, -7.362e-02, -1.782e-01, 8.628e-03, -1.430e-01, 1.576e-01, -1.538e-01, 9.937e-02, -1.037e-03, 8.466e-02, -7.769e-03, 1.498e-01, -7.806e-02, -1.294e-01, -2.572e-02, 1.327e-01));
	r += mul(s4_3, M4(-5.415e-02, -1.741e-01, 5.747e-02, -2.019e-02, 1.861e-01, -6.486e-02, 1.126e-01, 2.221e-01, 1.252e-01, -3.269e-02, -1.470e-01, -1.061e-01, -3.489e-02, -4.614e-02, -4.317e-02, 2.373e-02));
	r += mul(s4_4, M4(-1.777e-01, 2.710e-02, -3.992e-02, -1.750e-01, -1.067e-01, -1.050e-01, -3.173e-01, 1.977e-01, 1.913e-01, 1.056e-01, 6.696e-02, -3.033e-01, 9.398e-02, -6.405e-03, -9.652e-03, 3.989e-02));
	r += mul(s4_5, M4(3.614e-02, -2.415e-02, -7.483e-02, 1.032e-01, -2.156e-01, 3.760e-01, -2.740e-01, -9.042e-02, -2.041e-01, 1.055e-01, 2.293e-01, -8.301e-02, -1.113e-01, 8.617e-03, -2.125e-02, -9.272e-02));
	r += mul(s4_6, M4(9.263e-02, -6.307e-02, 5.636e-02, -2.100e-02, -1.294e-01, -7.647e-03, -4.177e-02, -3.614e-02, -5.024e-01, -9.556e-02, -1.460e-01, -1.035e-01, -5.274e-02, -3.602e-02, 7.783e-02, 7.514e-02));
	r += mul(s4_7, M4(1.637e-01, 1.444e-01, 4.168e-02, -1.246e-01, 8.515e-02, -1.587e-01, -2.420e-01, -2.454e-01, 1.013e-01, 2.599e-01, 3.772e-01, 6.808e-02, -6.795e-02, 1.837e-01, 6.424e-02, -2.261e-02));
	r += mul(s4_8, M4(-9.780e-02, -2.525e-02, 1.251e-01, 1.286e-01, 2.938e-01, -4.910e-02, -6.868e-02, -4.813e-02, 1.199e-01, 1.295e-01, -3.079e-01, 8.940e-02, -3.757e-03, -1.445e-01, 9.543e-02, 1.553e-01));
	r += mul(s5_0, M4(1.186e-01, -2.205e-02, -3.427e-01, -3.424e-01, 1.843e-02, 6.114e-02, -1.087e-01, -7.415e-03, 3.993e-02, 2.651e-02, 1.467e-01, 6.105e-04, -1.080e-01, 1.258e-01, -1.729e-01, -7.216e-03));
	r += mul(s5_1, M4(-2.679e-01, 5.804e-02, -1.816e-01, 2.207e-01, -2.009e-01, -1.762e-02, 3.188e-02, -1.822e-01, 5.268e-02, -6.641e-02, 2.279e-01, 7.508e-02, 3.310e-02, 8.985e-02, -1.834e-01, -3.886e-02));
	r += mul(s5_2, M4(1.843e-02, 1.547e-01, -3.799e-01, 5.699e-02, 3.773e-01, 1.677e-01, -9.834e-02, 7.512e-02, -2.524e-01, 1.043e-01, -3.282e-02, 4.121e-02, 1.174e-01, 3.207e-03, -1.808e-01, -3.663e-02));
	r += mul(s5_3, M4(-5.561e-02, 4.176e-01, -2.231e-01, 1.208e-01, -2.510e-01, -8.773e-02, 9.459e-02, -5.313e-02, 1.463e-01, -1.630e-01, -4.437e-02, -8.088e-02, -4.934e-01, -6.573e-02, 8.557e-02, -4.096e-02));
	r += mul(s5_4, M4(8.186e-02, -1.844e-01, 1.705e-01, 7.483e-02, -2.746e-01, -6.372e-02, 6.459e-02, -1.048e-01, 2.139e-01, -1.889e-02, -2.934e-01, -2.557e-01, 2.013e-01, -1.773e-01, 1.988e-01, 1.563e-01));
	r += mul(s5_5, M4(-4.151e-03, 3.976e-02, 8.492e-02, -6.545e-02, 2.319e-01, 1.423e-01, -1.815e-02, 3.074e-02, -5.575e-02, 1.540e-02, -6.605e-02, -1.032e-03, 1.375e-01, 4.536e-02, 1.015e-01, -1.465e-01));
	r += mul(s5_6, M4(3.149e-02, -1.012e-01, -5.762e-01, 3.408e-01, 4.326e-02, 5.614e-03, 1.175e-01, 1.259e-02, 4.803e-02, -1.723e-01, -1.382e-01, 1.344e-01, 4.600e-01, -1.617e-01, -2.588e-01, 1.638e-01));
	r += mul(s5_7, M4(2.048e-01, 4.332e-02, 1.848e-01, -3.604e-01, -2.261e-01, -1.508e-01, -6.950e-03, 9.758e-02, -6.281e-02, -8.408e-02, 4.513e-02, -9.105e-02, -3.698e-01, 4.274e-02, -1.901e-01, -5.405e-03));
	r += mul(s5_8, M4(-1.685e-01, 4.163e-02, 3.980e-02, -1.844e-01, -4.091e-02, 7.826e-02, -1.567e-02, 1.459e-01, -2.282e-02, 7.285e-02, -3.953e-01, 1.006e-01, 7.857e-03, -3.035e-02, -1.264e-01, -4.881e-02));
	r += mul(s6_0, M4(4.902e-02, -1.445e-01, 2.465e-01, 3.400e-02, 2.473e-02, -1.400e-01, 1.087e-01, -7.253e-02, -1.108e-01, 1.126e-01, -3.531e-02, 1.471e-02, 5.746e-02, 2.257e-02, 1.677e-01, -1.217e-01));
	r += mul(s6_1, M4(-1.741e-01, 2.362e-01, 1.354e-01, 1.540e-01, -7.186e-02, -1.081e-01, 3.640e-02, -7.877e-02, -3.734e-02, 2.443e-02, -3.647e-01, -1.431e-01, -1.476e-01, 5.666e-02, 2.071e-01, 1.688e-01));
	r += mul(s6_2, M4(4.008e-01, 7.678e-02, 3.198e-01, -4.019e-02, 7.331e-02, 1.380e-01, -4.423e-02, -6.537e-02, 9.380e-02, -1.094e-02, 6.226e-02, 1.114e-01, -1.281e-01, -6.551e-02, 1.202e-01, -1.214e-01));
	r += mul(s6_3, M4(-2.556e-02, 1.131e-02, -2.070e-01, 1.448e-01, -2.169e-02, -6.685e-02, 1.419e-01, 2.091e-03, -1.227e-01, 3.769e-02, 5.021e-02, 5.427e-03, 1.610e-01, -9.560e-02, 5.762e-02, 3.525e-01));
	r += mul(s6_4, M4(4.889e-02, 1.146e-01, 4.403e-01, 1.301e-01, -7.739e-02, 2.461e-02, 3.249e-03, -4.671e-02, 7.585e-02, -2.600e-01, 1.391e-01, -1.733e-01, -2.416e-01, -2.492e-01, -3.474e-01, 3.281e-01));
	r += mul(s6_5, M4(1.752e-01, 1.489e-01, 4.170e-01, 2.319e-02, -6.171e-02, 2.129e-01, -5.746e-02, -2.849e-02, 1.139e-01, 1.991e-01, -2.380e-01, 1.588e-01, -1.587e-01, -1.974e-01, 1.952e-01, 1.432e-01));
	r += mul(s6_6, M4(-1.657e-02, 8.776e-02, 2.591e-02, -2.024e-01, 1.049e-01, -1.078e-01, 3.503e-02, -4.971e-02, -1.480e-01, -9.008e-02, -2.529e-01, 2.549e-01, 5.592e-01, -5.244e-02, 8.626e-02, 3.597e-02));
	r += mul(s6_7, M4(-3.532e-02, 8.905e-02, 1.791e-01, -9.458e-02, 6.780e-02, -5.794e-02, -1.292e-01, 1.035e-02, -2.125e-01, 1.398e-01, 1.009e-01, 2.977e-02, 2.564e-01, -2.495e-01, -6.772e-01, -4.445e-01));
	r += mul(s6_8, M4(-9.304e-02, 2.128e-01, 3.858e-01, 2.376e-02, 1.338e-01, 4.078e-02, -4.558e-02, 2.654e-02, 7.575e-02, -1.313e-01, 3.156e-02, -5.099e-02, -1.872e-01, 6.766e-02, -8.702e-02, -1.713e-01));
	r += mul(s7_0, M4(-5.595e-02, 6.183e-02, 2.464e-01, -1.414e-01, 7.124e-02, -6.475e-02, 2.529e-01, -1.904e-01, 3.912e-02, -9.186e-02, -1.179e-01, -1.466e-01, 9.174e-03, 1.061e-02, 5.436e-02, 9.571e-02));
	r += mul(s7_1, M4(-2.018e-01, -8.089e-03, -7.846e-02, -9.330e-02, -1.790e-02, 3.018e-02, 1.472e-01, -1.160e-01, -1.650e-01, -8.923e-02, -1.474e-01, -7.902e-02, 4.715e-02, 9.154e-02, 3.535e-02, 2.654e-02));
	r += mul(s7_2, M4(-1.588e-01, -8.289e-02, 2.280e-03, -7.578e-02, -1.446e-01, 3.556e-01, -1.940e-01, -2.261e-01, -4.402e-02, 7.329e-03, 2.272e-02, -4.111e-02, 4.818e-02, -6.474e-02, -8.609e-02, 3.821e-03));
	r += mul(s7_3, M4(1.366e-01, 2.409e-03, -5.919e-02, 5.778e-02, -2.203e-01, -1.450e-01, 5.869e-02, -1.753e-01, 9.278e-02, -3.186e-01, 1.456e-01, 2.001e-02, 1.284e-03, -3.633e-02, 6.268e-02, 3.140e-02));
	r += mul(s7_4, M4(-2.500e-01, 4.451e-02, -2.156e-01, -1.797e-01, -6.846e-02, -4.157e-01, 4.179e-01, -5.199e-02, -8.430e-02, -2.177e-01, 1.486e-01, 2.498e-01, -4.685e-02, -8.815e-02, 1.002e-01, 1.469e-01));
	r += mul(s7_5, M4(5.817e-02, 1.482e-01, -1.004e-01, 7.580e-02, 2.545e-01, 3.320e-01, -3.625e-01, -1.010e-01, -1.058e-01, -1.438e-02, 4.576e-02, 3.802e-02, 3.077e-01, 1.275e-01, -3.467e-01, 1.315e-01));
	r += mul(s7_6, M4(7.622e-02, -9.737e-03, -5.742e-02, -8.808e-02, 2.788e-01, -8.131e-02, 2.101e-02, 2.700e-02, 2.274e-01, -4.161e-02, 2.973e-01, -2.978e-02, -1.343e-01, 5.086e-04, 2.287e-02, 5.469e-02));
	r += mul(s7_7, M4(8.660e-02, -1.785e-01, -1.782e-01, 1.151e-01, -9.595e-02, -1.576e-01, -5.201e-02, 3.305e-01, -7.192e-02, 2.183e-01, 1.806e-02, -1.954e-01, -1.990e-01, 3.558e-01, 1.260e-01, 2.973e-01));
	r += mul(s7_8, M4(-1.830e-01, -1.268e-01, 2.358e-02, -1.262e-02, -1.297e-02, 2.289e-01, -4.859e-02, 1.057e-01, 5.627e-02, -1.755e-02, 8.560e-02, -1.372e-01, 1.458e-01, 2.299e-02, -2.603e-01, 4.789e-02));
	r += V4(-2.183e-02, 2.789e-02, -4.360e-02, 1.810e-03);
	return r;
}

void Pass3(uint2 blockStart, uint3 tid) {
	float2 pt = float2(GetInputPt());
	uint2 gxy = Rmp8x8(tid.x) + blockStart;
	uint2 size = GetInputSize();
	if (gxy.x >= size.x || gxy.y >= size.y) {
		return;
	}
	float2 pos = (gxy + 0.5) * pt;

	V4 s0_0 = l0(-1.0, -1.0);
	V4 s0_1 = l0(0.0, -1.0);
	V4 s0_2 = l0(1.0, -1.0);
	V4 s0_3 = l0(-1.0, 0.0);
	V4 s0_4 = l0(0.0, 0.0);
	V4 s0_5 = l0(1.0, 0.0);
	V4 s0_6 = l0(-1.0, 1.0);
	V4 s0_7 = l0(0.0, 1.0);
	V4 s0_8 = l0(1.0, 1.0);
	V4 s1_0 = -max(-s0_0, 0.0);
	V4 s1_1 = -max(-s0_1, 0.0);
	V4 s1_2 = -max(-s0_2, 0.0);
	V4 s1_3 = -max(-s0_3, 0.0);
	V4 s1_4 = -max(-s0_4, 0.0);
	V4 s1_5 = -max(-s0_5, 0.0);
	V4 s1_6 = -max(-s0_6, 0.0);
	V4 s1_7 = -max(-s0_7, 0.0);
	V4 s1_8 = -max(-s0_8, 0.0);
	s0_0 = max(s0_0, 0.0);
	s0_1 = max(s0_1, 0.0);
	s0_2 = max(s0_2, 0.0);
	s0_3 = max(s0_3, 0.0);
	s0_4 = max(s0_4, 0.0);
	s0_5 = max(s0_5, 0.0);
	s0_6 = max(s0_6, 0.0);
	s0_7 = max(s0_7, 0.0);
	s0_8 = max(s0_8, 0.0);

	V4 s2_0 = l1(-1.0, -1.0);
	V4 s2_1 = l1(0.0, -1.0);
	V4 s2_2 = l1(1.0, -1.0);
	V4 s2_3 = l1(-1.0, 0.0);
	V4 s2_4 = l1(0.0, 0.0);
	V4 s2_5 = l1(1.0, 0.0);
	V4 s2_6 = l1(-1.0, 1.0);
	V4 s2_7 = l1(0.0, 1.0);
	V4 s2_8 = l1(1.0, 1.0);
	V4 s3_0 = -max(-s2_0, 0.0);
	V4 s3_1 = -max(-s2_1, 0.0);
	V4 s3_2 = -max(-s2_2, 0.0);
	V4 s3_3 = -max(-s2_3, 0.0);
	V4 s3_4 = -max(-s2_4, 0.0);
	V4 s3_5 = -max(-s2_5, 0.0);
	V4 s3_6 = -max(-s2_6, 0.0);
	V4 s3_7 = -max(-s2_7, 0.0);
	V4 s3_8 = -max(-s2_8, 0.0);
	s2_0 = max(s2_0, 0.0);
	s2_1 = max(s2_1, 0.0);
	s2_2 = max(s2_2, 0.0);
	s2_3 = max(s2_3, 0.0);
	s2_4 = max(s2_4, 0.0);
	s2_5 = max(s2_5, 0.0);
	s2_6 = max(s2_6, 0.0);
	s2_7 = max(s2_7, 0.0);
	s2_8 = max(s2_8, 0.0);

	V4 s4_0 = l2(-1.0, -1.0);
	V4 s4_1 = l2(0.0, -1.0);
	V4 s4_2 = l2(1.0, -1.0);
	V4 s4_3 = l2(-1.0, 0.0);
	V4 s4_4 = l2(0.0, 0.0);
	V4 s4_5 = l2(1.0, 0.0);
	V4 s4_6 = l2(-1.0, 1.0);
	V4 s4_7 = l2(0.0, 1.0);
	V4 s4_8 = l2(1.0, 1.0);
	V4 s5_0 = -max(-s4_0, 0.0);
	V4 s5_1 = -max(-s4_1, 0.0);
	V4 s5_2 = -max(-s4_2, 0.0);
	V4 s5_3 = -max(-s4_3, 0.0);
	V4 s5_4 = -max(-s4_4, 0.0);
	V4 s5_5 = -max(-s4_5, 0.0);
	V4 s5_6 = -max(-s4_6, 0.0);
	V4 s5_7 = -max(-s4_7, 0.0);
	V4 s5_8 = -max(-s4_8, 0.0);
	s4_0 = max(s4_0, 0.0);
	s4_1 = max(s4_1, 0.0);
	s4_2 = max(s4_2, 0.0);
	s4_3 = max(s4_3, 0.0);
	s4_4 = max(s4_4, 0.0);
	s4_5 = max(s4_5, 0.0);
	s4_6 = max(s4_6, 0.0);
	s4_7 = max(s4_7, 0.0);
	s4_8 = max(s4_8, 0.0);

	V4 s6_0 = l3(-1.0, -1.0);
	V4 s6_1 = l3(0.0, -1.0);
	V4 s6_2 = l3(1.0, -1.0);
	V4 s6_3 = l3(-1.0, 0.0);
	V4 s6_4 = l3(0.0, 0.0);
	V4 s6_5 = l3(1.0, 0.0);
	V4 s6_6 = l3(-1.0, 1.0);
	V4 s6_7 = l3(0.0, 1.0);
	V4 s6_8 = l3(1.0, 1.0);
	V4 s7_0 = -max(-s6_0, 0.0);
	V4 s7_1 = -max(-s6_1, 0.0);
	V4 s7_2 = -max(-s6_2, 0.0);
	V4 s7_3 = -max(-s6_3, 0.0);
	V4 s7_4 = -max(-s6_4, 0.0);
	V4 s7_5 = -max(-s6_5, 0.0);
	V4 s7_6 = -max(-s6_6, 0.0);
	V4 s7_7 = -max(-s6_7, 0.0);
	V4 s7_8 = -max(-s6_8, 0.0);
	s6_0 = max(s6_0, 0.0);
	s6_1 = max(s6_1, 0.0);
	s6_2 = max(s6_2, 0.0);
	s6_3 = max(s6_3, 0.0);
	s6_4 = max(s6_4, 0.0);
	s6_5 = max(s6_5, 0.0);
	s6_6 = max(s6_6, 0.0);
	s6_7 = max(s6_7, 0.0);
	s6_8 = max(s6_8, 0.0);

	t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8);
	t1[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8);
	t2[gxy] = f2(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8);
	t3[gxy] = f3(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8);
}

//!PASS 4
//!DESC conv3
//!BLOCK_SIZE 8
//!NUM_THREADS 64
//!IN t0, t1, t2, t3
//!OUT t4, t5, t6, t7

#define l0(x, y) V4(O(t0, float2(x, y)))
#define l1(x, y) V4(O(t1, float2(x, y)))
#define l2(x, y) V4(O(t2, float2(x, y)))
#define l3(x, y) V4(O(t3, float2(x, y)))

V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) {
	V4 r = 0.0;
	r += mul(s0_0, M4(5.898e-02, 1.023e-01, 5.179e-02, 3.510e-02, -4.142e-01, -5.004e-01, -2.296e-01, 1.074e-02, 6.569e-02, -1.632e-02, -4.021e-02, 2.674e-02, -1.201e-01, 2.210e-03, 1.166e-01, -2.971e-02));
	r += mul(s0_1, M4(-1.003e-01, 1.235e-01, 6.209e-02, -1.355e-01, -2.316e-01, -1.078e-01, 2.680e-01, -4.341e-01, -3.042e-02, 1.843e-01, 1.914e-02, -3.570e-02, -5.369e-02, -1.635e-02, 6.805e-02, 3.052e-02));
	r += mul(s0_2, M4(-1.906e-02, 6.295e-02, 5.892e-02, -1.250e-01, 2.057e-01, 1.112e-01, 2.352e-01, -2.113e-01, 1.208e-01, -6.867e-03, 4.805e-02, 7.473e-02, -6.804e-03, -1.886e-02, -1.671e-02, 2.571e-02));
	r += mul(s0_3, M4(-1.429e-01, 1.016e-01, 1.189e-01, -1.123e-01, -5.401e-01, -1.508e-01, 1.473e-01, 2.299e-01, -2.651e-01, -2.545e-01, -9.793e-02, 1.652e-01, -1.913e-01, 2.026e-01, 3.793e-02, -1.470e-01));
	r += mul(s0_4, M4(8.107e-02, 1.051e-01, 2.518e-01, 8.745e-02, 6.514e-01, -3.956e-01, 1.570e-01, -4.313e-01, 1.011e-01, -2.761e-01, 1.706e-01, -7.386e-02, 8.301e-02, 1.543e-01, -7.868e-03, -2.640e-01));
	r += mul(s0_5, M4(4.034e-02, 1.852e-01, -8.966e-02, -7.864e-02, 5.985e-02, -3.990e-01, 4.423e-01, 3.717e-01, 2.115e-01, 7.712e-02, 1.040e-01, 7.704e-02, 5.719e-02, 3.220e-02, -4.211e-02, -3.406e-02));
	r += mul(s0_6, M4(-5.175e-02, -7.418e-02, 5.758e-03, 6.788e-02, -3.265e-01, -2.676e-01, -1.584e-02, 2.578e-01, -2.691e-02, -3.379e-02, -1.588e-02, -3.627e-03, -1.053e-01, -7.474e-02, 9.695e-02, 4.017e-02));
	r += mul(s0_7, M4(2.569e-02, -1.294e-01, -2.184e-01, -1.896e-01, 1.249e-01, -3.481e-01, 3.441e-01, -2.525e-01, -1.948e-02, 4.415e-02, -1.896e-01, -1.199e-01, -3.296e-02, 5.648e-04, 7.137e-02, -6.672e-02));
	r += mul(s0_8, M4(-1.059e-01, -1.555e-01, -4.792e-03, -2.355e-02, 8.325e-02, -1.706e-01, 2.737e-02, -3.699e-02, 5.793e-03, -3.858e-02, 7.466e-02, -2.144e-02, -1.472e-01, -8.354e-02, 9.293e-04, 7.899e-02));
	r += mul(s1_0, M4(1.214e-01, -3.888e-02, -7.692e-02, 7.518e-03, -1.114e-01, -6.412e-02, 3.779e-02, -3.651e-02, 6.508e-02, 4.873e-02, -8.752e-02, -2.748e-04, 1.274e-01, 2.137e-02, 3.784e-02, 1.540e-02));
	r += mul(s1_1, M4(9.708e-02, 5.635e-02, -8.596e-02, 9.531e-02, -2.201e-02, -6.158e-02, -9.603e-02, 4.481e-02, 1.933e-01, 5.793e-02, -8.643e-02, -1.559e-01, 1.082e-01, 9.472e-02, 1.581e-01, 2.260e-01));
	r += mul(s1_2, M4(-4.898e-02, -5.911e-03, 1.488e-02, 5.290e-02, 8.232e-02, -6.016e-03, 2.088e-02, -2.137e-03, -6.928e-02, 7.237e-02, 1.024e-01, 4.490e-02, 2.106e-01, 3.186e-02, 1.385e-01, 2.273e-01));
	r += mul(s1_3, M4(-2.274e-03, 1.202e-01, 8.561e-02, -2.359e-03, 9.927e-03, 6.843e-02, 3.887e-02, 6.791e-02, 2.428e-02, -5.698e-02, -5.294e-02, -1.431e-02, 1.391e-01, -1.181e-01, -8.015e-02, -1.303e-01));
	r += mul(s1_4, M4(1.543e-01, 2.311e-01, 5.523e-01, 4.505e-01, 1.687e-01, -1.044e-01, 2.146e-01, 7.037e-02, 3.646e-01, -8.474e-02, -2.911e-02, -3.091e-02, 1.659e-01, 1.867e-01, -1.452e-02, -9.702e-03));
	r += mul(s1_5, M4(-5.954e-02, 1.475e-01, -1.169e-01, -2.935e-02, -1.248e-02, -2.545e-02, 1.196e-01, 7.875e-02, -1.230e-01, -4.100e-02, -1.244e-01, 2.200e-02, -8.112e-02, 1.377e-02, 4.385e-02, 3.784e-01));
	r += mul(s1_6, M4(-6.024e-02, -1.752e-01, -4.626e-02, 1.655e-01, 1.731e-03, -7.482e-02, -1.530e-01, -7.325e-02, 5.236e-02, 1.229e-01, 3.696e-02, -4.321e-03, -1.850e-01, 2.204e-02, -2.633e-02, 3.600e-01));
	r += mul(s1_7, M4(2.696e-02, -1.389e-02, 4.563e-02, -7.437e-02, -3.164e-02, 3.326e-03, -5.309e-02, -3.683e-02, 1.613e-01, 1.284e-01, -2.614e-02, -8.940e-02, -1.848e-01, -4.281e-02, 6.911e-03, 1.952e-01));
	r += mul(s1_8, M4(6.517e-02, -1.205e-01, -1.575e-01, -2.235e-01, -3.822e-02, 8.424e-02, 3.246e-02, -2.478e-02, -7.266e-03, -6.509e-02, -1.253e-01, -4.156e-02, -1.074e-01, -9.996e-02, 6.088e-02, 2.346e-01));
	r += mul(s2_0, M4(7.127e-02, -1.396e-02, -6.454e-03, 2.312e-01, 2.245e-01, 1.260e-01, -5.369e-02, 6.271e-02, 5.813e-03, 1.763e-01, 5.081e-02, -1.903e-02, 9.205e-02, 3.677e-02, 1.465e-02, -3.572e-02));
	r += mul(s2_1, M4(-8.076e-02, 2.054e-01, -1.043e-01, 1.886e-02, 1.810e-01, 1.393e-01, 1.145e-01, 1.048e-01, 7.747e-02, 1.002e-01, 3.084e-02, -1.256e-01, 1.678e-01, -5.360e-02, 9.116e-03, -3.719e-02));
	r += mul(s2_2, M4(7.671e-02, -1.683e-01, 2.119e-02, -1.963e-01, 3.664e-02, -3.882e-02, -6.431e-02, 4.355e-02, 5.644e-03, -2.142e-02, 1.063e-02, -1.070e-02, -3.074e-03, 7.798e-02, 5.430e-02, -5.788e-02));
	r += mul(s2_3, M4(1.597e-01, 4.635e-03, -9.995e-02, -5.731e-02, 2.099e-01, -1.744e-01, -1.172e-01, -3.430e-02, 4.002e-02, -4.481e-02, -1.069e-01, 1.615e-03, -5.802e-03, 6.416e-02, -7.563e-03, -3.679e-03));
	r += mul(s2_4, M4(-7.730e-02, 8.141e-02, 2.194e-01, 3.742e-02, -5.233e-02, -1.086e-01, 2.415e-02, -2.193e-01, -4.474e-02, -4.683e-02, -5.579e-02, 8.826e-02, 3.113e-01, -9.740e-02, 2.551e-01, 4.137e-02));
	r += mul(s2_5, M4(-6.207e-02, 2.488e-02, 5.997e-02, -2.508e-01, 9.913e-02, 8.113e-02, 4.466e-02, 2.117e-01, -1.341e-02, 2.354e-02, 4.115e-02, 1.401e-02, -3.650e-02, -1.225e-01, -3.836e-02, 1.445e-01));
	r += mul(s2_6, M4(1.256e-01, -7.921e-02, 5.601e-02, 1.096e-01, -1.118e-01, 2.026e-02, -9.184e-02, 5.103e-02, -5.814e-02, -1.031e-01, -1.367e-01, -8.229e-02, -1.179e-01, 4.272e-02, -2.452e-03, 1.150e-01));
	r += mul(s2_7, M4(1.799e-01, 3.391e-02, 1.348e-01, 2.273e-01, 6.945e-02, 1.657e-01, 7.544e-02, -7.215e-02, -5.604e-02, -1.335e-02, 1.002e-01, 1.115e-01, 1.283e-01, 5.942e-02, -7.851e-02, 9.534e-02));
	r += mul(s2_8, M4(5.800e-02, -4.717e-03, -4.043e-02, -3.308e-01, 4.501e-02, -3.996e-02, -5.832e-02, -9.447e-02, -2.623e-03, 7.341e-03, 2.828e-02, 1.066e-02, -7.222e-03, -9.796e-02, -4.808e-02, 8.999e-03));
	r += mul(s3_0, M4(1.333e-01, 9.811e-02, 3.517e-02, 1.008e-02, 2.086e-01, -1.047e-01, -2.222e-02, 1.142e-01, 1.103e-01, 4.521e-02, 3.430e-01, 1.308e-01, -1.588e-02, 5.853e-02, -6.328e-02, -8.583e-02));
	r += mul(s3_1, M4(-1.590e-02, 5.338e-02, 1.783e-01, -3.995e-02, -2.121e-01, -2.104e-01, -8.402e-02, -6.910e-02, -2.198e-01, -1.369e-01, 4.857e-01, -3.527e-01, 5.229e-02, 4.553e-02, 3.975e-02, 1.113e-01));
	r += mul(s3_2, M4(1.378e-01, 5.009e-02, 1.147e-02, -4.811e-02, 1.995e-02, -4.220e-02, -1.434e-02, -1.682e-01, 1.375e-02, -2.277e-01, 4.034e-01, 1.324e-02, -1.265e-01, -4.825e-03, -5.124e-02, 4.928e-02));
	r += mul(s3_3, M4(3.023e-01, -1.505e-01, -1.060e-01, -2.507e-02, -1.052e-01, -4.072e-03, 9.374e-02, 1.291e-01, 2.717e-02, -2.332e-02, 1.779e-01, 2.247e-01, 4.461e-02, -3.617e-02, 1.135e-01, 1.578e-01));
	r += mul(s3_4, M4(6.922e-03, -7.092e-03, -7.514e-02, -1.999e-01, -7.352e-02, 9.282e-02, -4.062e-02, 4.427e-02, -2.842e-01, 5.538e-02, 1.728e-01, 6.405e-01, 5.666e-02, -1.584e-01, 2.390e-01, -1.563e-01));
	r += mul(s3_5, M4(-1.542e-01, 6.271e-02, -7.880e-02, -6.906e-02, 5.749e-02, 3.161e-01, -1.099e-01, -2.815e-01, -1.928e-01, 2.556e-02, 2.316e-01, 3.207e-01, -7.350e-02, -8.937e-02, -8.128e-02, -3.217e-02));
	r += mul(s3_6, M4(-1.054e-01, -6.319e-02, 1.393e-01, 1.755e-01, 1.463e-02, 4.238e-02, 1.288e-01, 2.410e-02, 6.348e-02, -9.399e-02, 5.054e-02, 1.567e-01, -9.147e-02, -9.821e-02, -5.046e-02, -9.996e-02));
	r += mul(s3_7, M4(4.389e-02, 1.437e-01, 1.209e-01, -1.093e-01, -3.484e-02, -9.388e-03, -1.517e-02, 2.933e-02, 2.747e-02, -2.815e-03, 2.313e-02, 4.478e-02, -2.331e-02, -2.325e-02, 4.810e-02, 3.544e-03));
	r += mul(s3_8, M4(1.622e-01, -7.435e-02, -2.240e-02, -1.503e-01, 9.484e-02, 7.273e-03, 1.479e-02, 5.303e-04, -3.781e-02, 1.331e-02, 2.239e-01, 6.487e-02, 3.113e-02, -1.109e-02, -7.214e-02, -5.213e-02));
	r += mul(s4_0, M4(-1.767e-01, -1.761e-02, 5.794e-02, -2.092e-01, -1.063e-01, -6.469e-02, -1.832e-01, -2.754e-01, 6.988e-02, 4.643e-02, -8.930e-04, 2.108e-02, -1.621e-01, 1.921e-03, 9.110e-03, 5.548e-02));
	r += mul(s4_1, M4(-3.711e-02, 4.007e-02, -2.122e-02, 1.410e-01, -4.001e-01, -2.502e-01, -3.357e-01, 1.560e-01, -6.828e-02, -2.182e-01, -2.920e-02, 7.451e-02, 2.454e-01, 5.595e-02, 1.532e-02, 2.326e-01));
	r += mul(s4_2, M4(-3.415e-02, 1.130e-01, 1.315e-01, 9.990e-02, 2.578e-01, 3.132e-02, -2.092e-02, -4.679e-02, 1.714e-01, -6.984e-02, -3.989e-02, 6.598e-03, 2.138e-02, 5.911e-02, -1.324e-01, 2.257e-02));
	r += mul(s4_3, M4(-3.899e-02, -2.317e-02, -5.292e-02, 1.811e-01, -1.660e-02, 3.567e-01, -3.313e-02, -1.344e-01, 3.119e-02, 1.979e-02, 1.413e-01, 2.558e-02, -2.075e-01, -5.004e-02, 1.268e-01, 4.136e-02));
	r += mul(s4_4, M4(-1.033e-01, 1.753e-01, -3.781e-02, -1.163e-02, -1.936e-02, -7.617e-02, -1.734e-01, 3.961e-01, 2.580e-02, 2.199e-02, 3.309e-02, -1.168e-04, -1.629e-01, 2.210e-01, -1.099e-01, -6.111e-02));
	r += mul(s4_5, M4(-1.314e-01, 1.596e-01, -4.496e-02, 3.517e-02, 3.716e-01, 1.264e-02, 1.556e-01, -1.134e-01, -4.923e-02, 8.925e-02, 5.157e-02, -1.111e-01, 1.235e-01, -5.074e-02, -9.965e-02, -2.354e-01));
	r += mul(s4_6, M4(-2.187e-01, -8.205e-02, -5.784e-02, -1.245e-01, -1.000e-01, 9.404e-02, 7.355e-02, -5.207e-01, 2.849e-02, 4.311e-02, -2.477e-03, -1.228e-01, -7.596e-02, 3.517e-02, 5.883e-02, 6.495e-02));
	r += mul(s4_7, M4(-1.546e-01, 1.033e-01, -8.544e-02, 1.434e-01, -2.261e-01, -1.058e-01, 7.453e-03, 2.644e-01, -1.821e-01, 3.247e-03, 1.335e-01, 1.569e-01, 1.079e-01, 1.831e-02, 7.809e-02, 7.146e-02));
	r += mul(s4_8, M4(-1.282e-01, 4.611e-02, 1.102e-01, 8.853e-02, -1.722e-01, -1.031e-01, 2.514e-01, -1.527e-01, -2.682e-03, 1.106e-01, -6.223e-03, -2.285e-02, 1.846e-01, 1.044e-02, -4.121e-02, -9.054e-02));
	r += mul(s5_0, M4(1.332e-01, -5.180e-02, 7.027e-03, 4.352e-02, -2.723e-02, -2.257e-02, -5.886e-02, -4.457e-02, -7.016e-02, 6.257e-02, 1.170e-02, 8.506e-02, -4.006e-02, -6.097e-02, 6.540e-02, 8.862e-02));
	r += mul(s5_1, M4(-4.496e-02, -5.056e-02, -1.407e-03, 1.206e-01, -4.892e-02, -8.885e-02, 6.538e-03, 4.501e-02, 4.362e-02, -8.527e-02, -1.946e-01, 1.466e-01, 1.045e-01, -1.014e-01, 1.647e-01, -2.132e-01));
	r += mul(s5_2, M4(1.616e-02, -1.825e-01, -2.892e-02, 3.381e-03, -2.863e-02, 4.838e-02, -1.173e-02, 5.060e-03, 1.067e-01, -1.108e-01, 1.812e-01, -3.503e-02, -1.645e-01, 6.200e-02, 4.331e-02, -1.618e-01));
	r += mul(s5_3, M4(-1.727e-01, -1.164e-01, -7.706e-02, 1.022e-02, -8.109e-02, 8.064e-02, 1.430e-01, -3.233e-02, -1.537e-01, 5.173e-03, -1.557e-01, 5.926e-02, -1.343e-01, -2.039e-03, -2.499e-02, -7.986e-02));
	r += mul(s5_4, M4(-2.449e-01, 8.561e-02, -5.706e-02, -2.523e-02, -7.240e-02, 4.196e-02, -1.457e-01, 1.570e-01, -7.590e-02, -5.256e-02, -1.481e-01, -2.097e-01, -2.887e-01, -5.940e-02, -2.843e-02, -4.960e-02));
	r += mul(s5_5, M4(-6.858e-02, 2.541e-02, -3.451e-02, -3.459e-01, 1.838e-02, -1.523e-03, 2.673e-02, -8.445e-02, 3.166e-04, 1.589e-02, -1.491e-01, -1.015e-01, -3.630e-01, -3.018e-01, 7.412e-02, 1.560e-01));
	r += mul(s5_6, M4(-1.742e-02, 1.457e-03, -1.061e-01, 1.826e-02, 6.549e-02, 1.906e-02, -3.508e-02, 2.989e-02, -4.677e-02, -1.683e-01, 2.756e-01, -2.025e-01, 1.932e-02, 1.945e-02, 2.062e-02, -2.864e-02));
	r += mul(s5_7, M4(1.323e-01, -1.310e-01, -4.512e-02, -4.500e-02, 1.232e-01, 1.050e-02, -1.799e-02, -1.608e-01, 1.488e-01, 1.833e-02, 3.364e-02, -1.157e-01, -8.935e-02, 6.832e-02, -2.770e-02, -4.232e-02));
	r += mul(s5_8, M4(8.221e-02, 8.575e-03, -4.399e-02, -2.292e-01, -4.005e-02, -7.331e-02, 7.642e-03, -1.209e-01, 6.489e-02, 1.665e-01, 5.812e-02, 1.639e-01, -1.728e-01, -4.343e-02, 1.431e-01, -1.113e-01));
	r += mul(s6_0, M4(-3.535e-02, 9.030e-02, -5.320e-02, 7.737e-02, -5.293e-03, -5.581e-03, -7.533e-02, -1.016e-01, 4.049e-02, 1.179e-01, 8.793e-02, 5.243e-02, -2.340e-01, -1.673e-01, -4.937e-02, -4.802e-02));
	r += mul(s6_1, M4(5.493e-02, -6.066e-02, -1.590e-01, 9.192e-02, -7.866e-02, -1.228e-01, -1.021e-01, 4.901e-02, -2.071e-02, 9.203e-02, 2.903e-01, 5.426e-02, 6.469e-02, 1.092e-01, 9.984e-02, -1.571e-01));
	r += mul(s6_2, M4(1.058e-01, 2.990e-02, 8.940e-03, 4.651e-02, 1.527e-01, -2.858e-02, 2.792e-02, 1.128e-01, -7.559e-02, -4.387e-02, 1.674e-01, 1.207e-01, -2.941e-02, 4.771e-03, -6.180e-02, -1.577e-01));
	r += mul(s6_3, M4(1.342e-01, 1.282e-01, 2.951e-02, -6.237e-02, 2.081e-01, 2.897e-02, 3.082e-01, 5.621e-02, -5.228e-02, -7.116e-03, -3.120e-02, 3.504e-02, -6.504e-02, 1.618e-01, 2.461e-02, -1.625e-01));
	r += mul(s6_4, M4(-1.675e-01, 2.803e-02, 3.666e-02, 2.285e-01, 4.541e-02, 1.696e-01, -3.614e-01, 6.305e-02, -1.967e-01, 1.952e-02, 1.416e-01, -1.762e-01, -7.810e-02, 1.376e-01, -7.025e-02, 1.066e-01));
	r += mul(s6_5, M4(1.353e-01, 1.662e-01, -4.207e-02, -2.346e-02, 3.252e-03, -4.560e-02, 1.697e-02, 2.627e-01, -1.936e-01, -9.696e-02, 2.661e-02, -7.479e-02, 1.646e-01, 1.714e-01, 1.544e-01, -3.598e-01));
	r += mul(s6_6, M4(1.437e-01, 3.831e-02, 6.898e-02, 2.046e-01, 7.234e-02, -1.429e-01, 1.993e-01, 1.094e-01, -4.150e-02, 2.519e-02, -7.068e-02, -8.525e-02, 2.540e-01, 6.760e-02, -1.217e-01, -2.109e-02));
	r += mul(s6_7, M4(1.491e-01, 1.031e-01, 1.842e-02, 7.649e-02, -1.016e-01, -1.476e-01, 1.011e-01, 3.578e-02, -1.648e-01, -4.999e-02, 1.983e-01, 1.706e-01, 1.083e-01, -4.382e-02, 3.283e-02, 2.631e-01));
	r += mul(s6_8, M4(1.966e-01, 3.838e-03, 3.439e-02, 5.648e-02, -5.775e-02, 4.761e-02, -2.427e-02, -6.857e-02, 9.181e-02, 4.807e-02, -4.314e-02, 1.334e-01, 1.542e-01, 1.521e-01, 7.161e-03, -1.978e-01));
	r += mul(s7_0, M4(1.146e-02, -6.005e-02, 5.545e-02, -9.761e-02, 1.503e-01, -2.184e-02, -2.481e-02, 7.512e-03, 1.664e-01, 1.335e-01, -1.812e-02, 7.166e-04, 1.337e-02, -2.437e-02, 1.074e-01, -1.082e-01));
	r += mul(s7_1, M4(-1.392e-01, -4.758e-02, -1.420e-01, -1.394e-01, 8.528e-02, 1.490e-01, -1.346e-02, -1.093e-02, -1.024e-02, -1.676e-01, 9.042e-02, -9.947e-02, -4.090e-02, -4.279e-03, 2.057e-01, 2.859e-02));
	r += mul(s7_2, M4(-1.675e-01, -2.194e-01, 5.117e-02, -1.949e-01, 1.652e-02, -9.819e-02, 5.137e-02, 4.510e-02, -1.041e-01, -8.313e-02, -8.920e-02, -1.221e-02, -8.391e-02, -7.470e-02, -3.483e-02, 2.702e-02));
	r += mul(s7_3, M4(1.624e-01, -1.387e-02, 1.789e-01, 4.982e-02, 2.285e-01, -1.281e-01, 1.582e-01, -1.199e-01, -1.904e-02, -2.422e-02, 1.750e-01, 1.218e-02, 8.004e-02, -1.072e-01, 3.704e-02, 1.066e-01));
	r += mul(s7_4, M4(-6.251e-03, -1.467e-02, 3.164e-02, 1.254e-01, -8.096e-02, 6.160e-02, -1.487e-01, -4.789e-03, 1.438e-01, -4.290e-02, 5.777e-02, -6.356e-02, 8.993e-02, -8.667e-02, -2.150e-01, 6.103e-02));
	r += mul(s7_5, M4(-1.233e-01, -1.353e-01, -8.816e-03, -1.068e-01, -3.542e-02, -6.869e-02, 1.217e-01, 2.516e-01, 1.305e-01, 1.769e-01, -3.911e-02, 3.566e-02, 4.754e-02, 1.708e-02, -5.803e-02, 1.294e-01));
	r += mul(s7_6, M4(-6.324e-02, -1.053e-01, 8.470e-02, 1.818e-02, 1.645e-02, -4.823e-02, 1.704e-01, -1.717e-02, -7.712e-02, 1.961e-02, -1.057e-01, -7.132e-02, 5.527e-02, -2.007e-02, 4.258e-02, -5.366e-02));
	r += mul(s7_7, M4(-7.590e-02, 1.014e-02, -4.700e-02, -8.332e-02, -2.349e-04, 8.786e-02, -9.277e-03, 8.736e-02, -2.654e-03, -1.076e-02, 6.248e-02, 1.686e-01, -2.289e-02, 8.949e-03, 3.939e-02, 2.480e-01));
	r += mul(s7_8, M4(-1.031e-01, -1.418e-01, -1.160e-02, -9.134e-02, -8.708e-02, -1.059e-02, 3.590e-02, 9.035e-02, 6.970e-02, 4.722e-02, -1.140e-01, -2.233e-02, 5.186e-02, -1.830e-03, 4.614e-02, -5.985e-02));
	r += V4(-6.233e-03, -4.512e-02, 8.188e-02, 1.874e-02);
	return r;
}

V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) {
	V4 r = 0.0;
	r += mul(s0_0, M4(1.072e-02, 2.987e-02, -4.870e-02, -9.877e-02, -2.593e-01, 8.765e-02, -3.330e-01, 4.828e-02, 3.932e-02, 3.659e-04, 1.314e-01, -1.119e-01, 2.564e-02, 5.181e-02, 7.177e-02, -2.318e-02));
	r += mul(s0_1, M4(1.366e-01, 8.945e-02, 6.219e-02, 1.591e-01, 1.157e-01, 1.207e-01, -1.396e-01, 1.469e-01, -1.020e-01, -1.702e-02, -3.086e-02, -8.716e-02, 5.325e-02, -7.046e-02, -5.272e-02, 1.926e-01));
	r += mul(s0_2, M4(9.012e-02, -1.006e-02, 5.874e-03, 1.501e-01, -5.434e-02, 9.288e-02, -3.788e-02, -1.272e-01, -6.693e-03, 5.686e-03, -3.368e-02, -2.107e-02, -3.497e-02, 3.170e-02, 5.741e-02, 5.688e-02));
	r += mul(s0_3, M4(-8.879e-02, 1.134e-01, -2.091e-01, -2.618e-02, -1.441e-01, -1.830e-01, 1.010e-01, -2.361e-02, 7.942e-02, -9.917e-02, 7.310e-02, 1.108e-01, -4.764e-02, 9.027e-03, 1.707e-01, 6.562e-02));
	r += mul(s0_4, M4(-1.312e-01, 2.961e-02, 3.481e-02, 8.557e-02, 7.619e-01, -4.109e-01, -7.920e-02, -6.275e-01, -3.851e-02, 1.703e-02, -1.155e-01, -6.941e-02, 8.292e-02, -7.877e-02, -1.201e-01, 8.829e-02));
	r += mul(s0_5, M4(6.816e-02, -1.082e-01, -1.076e-01, -2.286e-01, -2.404e-01, 3.053e-01, -1.037e-01, 1.641e-01, 1.430e-01, 1.705e-01, 8.834e-03, 8.574e-02, -4.016e-02, -2.470e-02, 7.121e-02, 1.250e-03));
	r += mul(s0_6, M4(6.383e-02, 1.785e-02, 1.217e-01, -5.187e-02, -1.436e-01, -1.256e-01, -9.063e-02, 1.767e-01, -3.213e-02, -2.595e-01, -1.391e-01, 8.152e-03, -9.418e-02, -2.258e-01, 2.352e-02, -5.914e-02));
	r += mul(s0_7, M4(2.106e-02, -1.326e-01, -1.596e-01, 1.914e-02, -8.504e-02, 6.304e-01, 1.081e-01, 2.286e-01, -1.086e-01, -1.806e-01, 1.410e-01, -3.709e-02, -4.926e-02, -1.029e-01, -9.883e-02, -2.675e-02));
	r += mul(s0_8, M4(-7.430e-03, 2.211e-02, -1.627e-02, -3.563e-02, -1.603e-02, -1.272e-01, -7.611e-02, 1.647e-01, -2.259e-01, 5.723e-02, -1.199e-01, 9.717e-03, -1.614e-02, -3.597e-02, -2.514e-02, -1.460e-01));
	r += mul(s1_0, M4(1.850e-02, 7.559e-02, 2.411e-01, -5.024e-02, -2.092e-03, 8.279e-03, 1.779e-01, 7.559e-02, 3.466e-02, -1.003e-01, 6.475e-02, -6.812e-02, 7.454e-02, -1.681e-01, 8.178e-02, 5.003e-02));
	r += mul(s1_1, M4(-7.757e-02, 1.018e-01, -4.131e-02, -2.083e-01, 2.325e-02, 1.465e-02, -4.361e-02, 8.990e-02, -1.067e-01, -1.373e-01, 4.339e-02, 1.775e-01, -4.043e-02, -1.337e-01, 9.192e-03, 8.997e-02));
	r += mul(s1_2, M4(-1.574e-01, -1.295e-01, -6.844e-02, -9.125e-02, -8.375e-03, 6.014e-02, 2.392e-02, -4.710e-02, -2.392e-02, -1.597e-01, 2.631e-03, -3.381e-03, 1.888e-01, -2.238e-01, -3.792e-02, -1.087e-01));
	r += mul(s1_3, M4(-5.730e-02, 1.616e-01, -1.510e-01, 1.484e-01, 2.380e-03, -5.401e-02, 3.699e-02, -4.056e-02, 1.217e-01, 2.217e-02, -4.013e-02, 1.260e-01, -1.723e-02, -2.650e-01, 2.407e-01, -6.329e-02));
	r += mul(s1_4, M4(-3.091e-01, 2.740e-01, -1.965e-02, 2.077e-02, 1.116e-01, -9.509e-02, -2.301e-01, -7.686e-02, -5.567e-02, 6.194e-02, -2.114e-02, -5.373e-02, 1.820e-01, -3.116e-01, -2.269e-01, 7.808e-02));
	r += mul(s1_5, M4(2.357e-01, 3.560e-02, -6.030e-02, -2.233e-01, 1.096e-01, 5.193e-02, -2.080e-01, 2.401e-02, 7.055e-02, -2.879e-02, 4.936e-02, 1.033e-01, -9.082e-02, 1.456e-02, -1.203e-01, -9.185e-02));
	r += mul(s1_6, M4(-4.888e-03, -1.463e-02, -1.374e-01, -7.310e-02, -8.804e-02, -1.713e-03, 1.975e-01, -7.981e-02, 1.088e-01, -3.366e-02, -2.186e-01, 5.982e-03, -5.231e-02, -8.029e-02, 1.584e-01, 1.171e-02));
	r += mul(s1_7, M4(-2.043e-01, -2.478e-01, -1.684e-01, 7.396e-02, -5.522e-02, 1.367e-02, 2.299e-02, -6.665e-02, -7.718e-02, 7.285e-02, 1.271e-01, 8.786e-02, 1.472e-01, 7.952e-02, -2.088e-01, -7.105e-02));
	r += mul(s1_8, M4(-2.984e-02, -9.663e-02, -6.416e-03, 4.946e-02, -5.698e-02, 5.056e-02, 4.170e-02, 7.171e-03, 1.453e-01, 3.092e-02, -5.553e-02, -6.796e-03, 1.211e-01, 1.874e-02, -1.011e-01, -3.792e-02));
	r += mul(s2_0, M4(-1.008e-01, -5.194e-02, -2.756e-02, -6.745e-03, -1.505e-01, -1.320e-01, -1.929e-01, 4.847e-02, -1.121e-01, -2.102e-01, -1.006e-01, 1.444e-02, -8.762e-02, -8.098e-03, -7.473e-02, -1.624e-01));
	r += mul(s2_1, M4(1.426e-01, 1.293e-01, -1.464e-02, -1.140e-02, -2.014e-02, 4.292e-02, -2.943e-01, -1.110e-01, 1.252e-01, -5.200e-02, 1.308e-01, 6.224e-02, 7.254e-02, -2.254e-02, 7.862e-02, -2.198e-01));
	r += mul(s2_2, M4(4.654e-02, -2.551e-01, -2.251e-01, 2.466e-01, -1.344e-01, -4.730e-02, -1.331e-01, 8.710e-02, -2.163e-01, -1.685e-01, 2.531e-02, 4.731e-02, 3.408e-02, 1.302e-02, -4.481e-02, -7.309e-02));
	r += mul(s2_3, M4(-1.833e-01, 1.709e-01, 5.242e-02, -2.393e-02, 5.576e-02, 6.886e-02, 4.389e-01, -4.525e-02, 7.399e-02, 1.564e-01, 9.747e-03, -1.112e-01, -2.863e-02, 2.534e-01, 1.318e-01, 1.231e-01));
	r += mul(s2_4, M4(1.389e-02, -2.544e-01, 6.263e-02, 4.152e-02, -4.439e-02, 9.213e-02, 1.155e-01, -3.731e-02, 4.804e-02, 6.394e-02, 3.836e-02, -1.901e-02, 3.252e-01, -1.103e-01, 3.155e-01, -3.334e-01));
	r += mul(s2_5, M4(2.692e-02, 1.670e-03, -1.956e-01, -2.065e-01, 2.862e-02, 1.070e-01, -5.289e-04, 2.543e-01, 3.560e-03, 3.700e-02, 2.995e-02, -1.575e-01, -2.839e-02, -1.122e-02, -1.519e-01, -3.399e-01));
	r += mul(s2_6, M4(-3.126e-02, 6.623e-02, 1.242e-01, 1.889e-02, -4.032e-02, 8.464e-03, 1.190e-01, 4.678e-02, -1.823e-02, -1.588e-02, -2.659e-02, 6.273e-02, -7.421e-02, -1.694e-01, 1.068e-01, -7.284e-03));
	r += mul(s2_7, M4(1.102e-01, -1.866e-01, -1.061e-01, -2.824e-01, 1.737e-02, 1.007e-01, 5.043e-03, -1.614e-01, 3.293e-03, 1.382e-01, -1.468e-02, -1.901e-02, -3.788e-02, 9.288e-02, -1.407e-01, -2.130e-02));
	r += mul(s2_8, M4(2.724e-02, 6.120e-02, 1.856e-01, 1.509e-02, 1.522e-01, 4.458e-02, 1.284e-01, 1.431e-01, -1.399e-02, -4.098e-02, 6.481e-02, -6.521e-02, 1.377e-01, -2.937e-02, 2.207e-02, 3.419e-02));
	r += mul(s3_0, M4(1.091e-02, -8.703e-02, 2.567e-01, 3.758e-02, -1.626e-01, 9.900e-02, 9.897e-02, 1.972e-02, -1.741e-01, -3.328e-01, 9.653e-02, -7.223e-02, -7.728e-02, 1.670e-01, 1.268e-01, -1.022e-02));
	r += mul(s3_1, M4(8.696e-02, 4.460e-02, -1.843e-01, 5.664e-02, 2.322e-01, 2.416e-01, 4.537e-02, -8.637e-02, 2.466e-01, -3.734e-01, -1.130e-01, -4.065e-02, 6.738e-02, 1.748e-01, 5.445e-02, -1.321e-01));
	r += mul(s3_2, M4(4.309e-02, 1.146e-01, 1.336e-03, -1.639e-01, 8.132e-02, 1.614e-01, 4.700e-02, -3.760e-01, -2.383e-01, 1.015e-01, 1.638e-02, 3.249e-01, -3.819e-02, 4.556e-02, -5.602e-02, 2.362e-01));
	r += mul(s3_3, M4(-1.616e-01, 1.037e-01, 4.294e-01, -8.711e-02, 9.881e-02, -1.627e-01, -4.622e-02, -3.638e-02, -1.581e-01, -2.005e-01, -2.844e-01, -1.328e-01, -5.698e-02, 1.438e-02, 2.079e-01, 1.697e-02));
	r += mul(s3_4, M4(-4.821e-02, -3.568e-02, -1.423e-01, -7.904e-02, 1.332e-01, 7.288e-03, 8.952e-02, -5.698e-02, 5.983e-02, 2.679e-01, -9.634e-02, 6.227e-02, 2.570e-01, -2.033e-02, -3.207e-01, -4.696e-02));
	r += mul(s3_5, M4(2.321e-01, 1.473e-02, 4.822e-02, -1.961e-01, 3.712e-01, 1.017e-02, -1.449e-02, 9.831e-02, 8.188e-02, 2.311e-01, 1.631e-01, -5.482e-02, -5.875e-02, -2.277e-01, 2.308e-02, -5.562e-02));
	r += mul(s3_6, M4(3.540e-03, 1.396e-01, -7.204e-02, 3.212e-02, -5.876e-02, 5.417e-02, -1.013e-01, -1.313e-02, -1.003e-01, -3.366e-01, 9.102e-02, 4.483e-02, -1.129e-01, -8.134e-02, 1.506e-01, -4.069e-02));
	r += mul(s3_7, M4(-4.611e-02, 6.878e-03, -1.654e-02, 4.913e-02, -5.735e-02, 1.157e-01, -1.774e-02, -8.984e-02, 2.189e-02, 2.690e-01, 2.728e-02, -6.942e-03, 5.850e-02, -9.036e-02, -1.306e-01, -4.179e-03));
	r += mul(s3_8, M4(-8.081e-02, -5.007e-02, 5.750e-02, 5.247e-02, -2.169e-02, 1.248e-01, 1.077e-05, 1.078e-01, -1.131e-01, -3.974e-02, 2.086e-01, -1.031e-02, 9.351e-02, -1.307e-01, -2.288e-01, 4.383e-02));
	r += mul(s4_0, M4(3.424e-02, 7.999e-02, 7.685e-02, 4.790e-03, -6.946e-02, 3.152e-01, -1.126e-01, 2.237e-01, -2.916e-02, 4.300e-02, 3.158e-02, 2.072e-02, 1.954e-02, 1.402e-01, 1.155e-01, -5.400e-02));
	r += mul(s4_1, M4(4.558e-02, -1.627e-01, 1.460e-01, -1.519e-01, 1.269e-01, -1.166e-01, 7.981e-02, -1.032e-01, -1.566e-02, -7.502e-02, 1.240e-02, -6.222e-02, -1.907e-02, 1.501e-01, 1.784e-01, -2.211e-01));
	r += mul(s4_2, M4(1.578e-02, 2.068e-02, 1.515e-02, -2.311e-02, -2.813e-02, 1.740e-01, 1.060e-01, -7.887e-02, 2.634e-02, 7.770e-02, 2.411e-02, 5.767e-02, 9.125e-02, -2.644e-02, -2.305e-02, -9.402e-02));
	r += mul(s4_3, M4(1.087e-01, -2.020e-01, 1.134e-01, 2.271e-02, -7.384e-02, 1.375e-01, -3.297e-01, 6.567e-02, -2.486e-02, 1.378e-01, -2.338e-03, 1.319e-01, 6.882e-02, 1.297e-01, -6.683e-02, 1.000e-01));
	r += mul(s4_4, M4(-2.737e-01, 1.427e-01, -3.568e-01, 3.743e-02, 2.118e-01, -8.814e-03, 2.406e-01, 9.181e-02, -4.541e-02, 8.189e-03, 1.395e-01, 4.992e-02, -7.127e-02, 1.991e-01, 3.851e-01, 1.538e-01));
	r += mul(s4_5, M4(1.133e-01, -3.303e-02, 4.544e-02, -1.079e-02, -6.669e-02, -1.649e-01, 1.830e-01, 5.873e-02, -6.171e-02, 6.666e-02, 9.514e-02, -1.057e-01, 1.724e-01, 8.855e-02, 6.628e-02, -9.804e-02));
	r += mul(s4_6, M4(-1.156e-01, 4.579e-02, -1.436e-01, -1.500e-01, 1.156e-04, 1.470e-01, 2.128e-02, 1.526e-02, -2.010e-02, -4.426e-02, -2.241e-02, 1.347e-02, -3.333e-02, -8.785e-03, -1.368e-01, 3.007e-02));
	r += mul(s4_7, M4(3.182e-02, -3.640e-02, -2.042e-02, 6.383e-03, 1.342e-01, -1.910e-01, -2.039e-01, -2.644e-01, -5.062e-02, -6.343e-02, -1.043e-02, -7.188e-02, -1.742e-01, -5.018e-02, 3.093e-02, -2.548e-02));
	r += mul(s4_8, M4(2.927e-02, 8.337e-02, -4.069e-03, -4.532e-02, -1.445e-02, -1.190e-02, 5.480e-02, -2.169e-01, 6.353e-02, -2.374e-01, -1.116e-01, 2.739e-02, -9.849e-04, -9.187e-02, 2.816e-02, 1.036e-01));
	r += mul(s5_0, M4(-6.044e-02, 8.869e-02, 1.616e-03, -5.585e-02, 5.658e-02, 9.891e-03, -1.156e-02, 1.706e-02, 7.989e-03, 3.445e-01, -3.770e-02, 1.117e-01, -1.178e-01, -1.527e-01, -2.576e-01, -1.498e-01));
	r += mul(s5_1, M4(-6.545e-02, -9.450e-03, -6.159e-02, -4.696e-02, 8.623e-02, -7.169e-02, -5.838e-02, 1.784e-02, 1.132e-01, 1.126e-01, 1.970e-02, 1.624e-01, -4.806e-02, -2.634e-01, -1.809e-01, -3.138e-01));
	r += mul(s5_2, M4(1.511e-01, 9.117e-02, -5.241e-02, -6.197e-02, -6.328e-02, -1.878e-02, -2.180e-02, -4.510e-02, -1.126e-01, 1.166e-01, 1.270e-01, 1.281e-01, -8.226e-02, -7.153e-02, 3.331e-02, -1.265e-01));
	r += mul(s5_3, M4(7.965e-02, -1.161e-02, 1.187e-01, 9.454e-02, -2.232e-02, -1.270e-02, -4.553e-02, 4.193e-02, -1.702e-01, 6.725e-02, -2.090e-01, -3.786e-02, 1.777e-01, -1.197e-01, 2.123e-01, 9.745e-02));
	r += mul(s5_4, M4(2.837e-02, -3.252e-01, 3.035e-01, 1.311e-01, 5.649e-02, -2.523e-02, 3.847e-02, 1.933e-01, -1.843e-01, 8.062e-02, 2.911e-02, -1.414e-01, 1.123e-01, 1.763e-01, -1.875e-01, 1.437e-01));
	r += mul(s5_5, M4(1.268e-01, -1.762e-01, 3.054e-01, -1.945e-01, -8.692e-02, -5.161e-04, 7.561e-02, 3.989e-02, 1.580e-01, -2.259e-01, 1.818e-01, -2.756e-01, -2.821e-01, -5.012e-02, -1.082e-01, -2.341e-01));
	r += mul(s5_6, M4(-7.304e-02, 9.063e-03, -1.128e-01, -9.447e-02, 1.103e-01, -2.242e-01, -2.195e-01, -2.092e-02, -5.718e-02, -9.265e-03, -1.021e-02, 9.082e-02, 6.298e-02, 8.127e-02, 1.945e-01, 6.062e-02));
	r += mul(s5_7, M4(1.440e-01, -1.044e-01, -1.070e-01, -3.389e-02, -2.963e-02, 5.793e-02, -3.072e-02, -2.344e-03, -2.492e-02, -2.873e-01, -3.807e-01, 2.718e-01, -2.020e-02, 1.431e-01, -1.180e-01, -1.030e-01));
	r += mul(s5_8, M4(3.213e-01, 6.824e-03, -7.111e-02, 3.511e-01, -4.654e-02, -6.974e-02, 6.345e-02, 1.485e-02, 2.044e-01, -1.140e-01, -4.490e-02, 1.607e-01, -9.209e-02, 1.401e-01, -5.090e-02, -8.411e-03));
	r += mul(s6_0, M4(6.762e-02, 3.111e-02, 8.905e-02, -8.417e-02, 3.567e-02, 4.367e-02, 1.799e-01, -6.583e-02, 2.586e-01, -5.969e-02, 9.891e-02, -6.286e-02, 2.217e-01, 9.322e-02, 1.010e-02, 4.488e-03));
	r += mul(s6_1, M4(-6.680e-02, 3.286e-02, 3.087e-02, -4.619e-03, -1.036e-02, 7.282e-02, 1.468e-01, -5.122e-02, 2.252e-01, 1.292e-01, -7.800e-02, -1.970e-01, -9.419e-02, 9.136e-02, -4.977e-02, 2.149e-01));
	r += mul(s6_2, M4(-4.593e-03, 3.990e-02, 1.935e-01, 1.642e-01, -1.323e-01, -3.260e-02, 6.514e-03, 8.888e-02, 1.945e-01, -2.007e-01, -1.095e-01, 8.508e-02, 2.230e-01, 1.375e-01, 5.262e-01, 3.002e-02));
	r += mul(s6_3, M4(-1.067e-01, 1.100e-02, -9.022e-02, -8.760e-03, -1.470e-01, 4.327e-02, -4.423e-01, -9.102e-02, -7.296e-02, -5.397e-02, 3.183e-01, -9.107e-02, 7.823e-02, 1.340e-01, -3.831e-01, 3.284e-01));
	r += mul(s6_4, M4(-1.125e-01, -1.064e-01, 5.733e-02, 2.783e-01, 4.943e-02, 8.598e-02, -1.754e-01, 1.106e-01, 7.048e-03, -1.128e-01, -1.402e-01, 7.994e-02, 6.531e-02, -1.963e-01, -1.339e-01, -1.787e-01));
	r += mul(s6_5, M4(6.126e-02, -2.194e-01, -1.802e-02, -1.396e-01, -1.366e-01, 1.403e-03, -2.045e-02, -4.980e-03, 3.831e-03, -5.249e-02, -8.786e-02, -9.294e-02, 5.863e-02, 2.601e-01, 1.074e-01, 2.285e-01));
	r += mul(s6_6, M4(4.155e-02, -2.726e-02, -1.368e-01, -3.527e-02, -1.657e-03, 4.799e-02, -1.674e-01, -2.810e-02, 1.697e-01, -2.212e-01, -1.978e-01, -8.034e-02, -3.231e-02, -3.516e-01, 1.731e-01, -2.747e-02));
	r += mul(s6_7, M4(1.853e-01, 6.818e-02, 4.581e-02, 2.965e-02, 2.423e-01, -1.597e-01, 1.566e-01, 2.783e-01, 4.119e-02, -5.123e-02, 2.303e-01, -9.911e-02, -9.150e-02, -6.106e-02, 1.850e-02, 2.050e-01));
	r += mul(s6_8, M4(2.283e-01, -1.458e-01, 1.233e-01, 8.499e-02, 6.123e-02, 1.016e-01, -5.853e-03, -8.747e-02, 7.646e-02, -6.887e-02, -2.715e-02, -3.436e-02, -1.119e-01, -1.890e-01, 2.693e-01, 2.559e-03));
	r += mul(s7_0, M4(1.384e-02, -1.850e-01, 5.129e-02, -1.023e-01, 9.918e-02, -2.892e-02, 1.510e-01, -8.043e-02, -5.683e-02, 4.200e-02, -1.634e-01, 4.746e-02, 3.527e-02, -5.455e-02, -1.567e-01, -1.112e-01));
	r += mul(s7_1, M4(-1.209e-01, -2.167e-01, -9.072e-02, 6.864e-02, -1.791e-01, -7.372e-02, 7.429e-02, -2.261e-01, 5.672e-02, -8.587e-02, -1.480e-01, 8.569e-02, -4.476e-02, 4.380e-02, -2.727e-01, 2.679e-01));
	r += mul(s7_2, M4(-9.439e-02, -1.420e-01, -8.216e-02, 1.063e-01, -2.824e-02, -1.300e-01, 4.051e-02, -3.691e-02, -3.306e-02, -6.779e-02, -1.291e-02, 1.214e-01, -1.861e-02, -7.860e-02, 6.709e-02, -1.240e-01));
	r += mul(s7_3, M4(-4.692e-02, 4.636e-03, -3.419e-02, 3.887e-02, -9.496e-02, -5.115e-02, -1.757e-01, 8.311e-02, -8.690e-03, 5.378e-02, 2.403e-01, -1.772e-02, 8.144e-03, 1.039e-02, -1.007e-02, 2.934e-02));
	r += mul(s7_4, M4(-8.310e-02, -1.082e-01, 1.465e-01, 1.458e-01, -6.636e-02, 1.827e-01, -1.259e-01, 7.035e-02, 1.577e-01, 2.910e-02, 1.289e-01, 1.968e-01, 6.651e-02, -1.057e-01, 3.715e-02, -2.438e-01));
	r += mul(s7_5, M4(-2.911e-02, -9.885e-02, -2.341e-02, -2.019e-01, 2.090e-01, 8.860e-02, -1.892e-01, 1.353e-01, -5.556e-02, -7.870e-03, 5.824e-02, 5.973e-02, -3.291e-02, 7.454e-03, 3.751e-02, -1.121e-01));
	r += mul(s7_6, M4(7.551e-02, -3.335e-02, -1.762e-01, 5.832e-02, 5.756e-02, -1.613e-02, 1.464e-01, -9.083e-02, 1.369e-02, -1.940e-02, 2.754e-02, -5.272e-02, -4.991e-02, 1.147e-01, -2.800e-02, -5.788e-02));
	r += mul(s7_7, M4(-2.630e-02, 8.548e-02, -3.191e-02, 1.016e-01, 2.250e-01, 8.246e-02, 1.763e-01, 8.115e-03, -1.055e-01, 6.761e-02, 2.263e-02, -9.174e-02, 4.222e-02, 7.305e-02, 1.293e-01, -2.264e-03));
	r += mul(s7_8, M4(3.674e-02, -2.297e-02, -3.111e-02, 1.134e-02, -1.363e-01, 1.643e-01, 9.281e-02, -7.981e-02, 8.216e-02, -9.746e-02, -1.500e-02, 7.859e-02, -8.993e-02, -1.750e-02, 1.173e-01, 4.751e-03));
	r += V4(1.698e-03, 2.579e-02, -1.678e-02, 7.305e-03);
	return r;
}

V4 f2(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) {
	V4 r = 0.0;
	r += mul(s0_0, M4(-6.186e-02, 7.644e-02, -1.504e-01, 6.998e-03, 3.227e-01, 5.705e-01, 1.276e-02, -9.864e-03, 2.392e-01, 1.188e-02, 2.665e-02, 4.136e-03, 1.107e-01, -1.087e-02, -3.264e-02, -1.099e-01));
	r += mul(s0_1, M4(5.791e-02, -8.280e-02, -2.316e-01, -1.607e-01, -4.218e-01, 6.681e-01, 1.097e-01, -7.250e-01, -1.247e-01, -1.116e-01, 6.560e-02, -9.282e-02, -3.846e-02, 4.643e-02, -2.071e-02, -2.146e-01));
	r += mul(s0_2, M4(3.270e-03, 3.178e-02, -1.218e-01, 5.875e-02, 2.629e-03, 2.297e-01, 4.372e-01, -2.591e-01, 2.010e-02, -6.648e-02, 2.203e-02, -3.337e-02, 5.933e-02, -9.940e-02, -8.647e-02, -3.455e-02));
	r += mul(s0_3, M4(-7.295e-02, 1.090e-01, -2.574e-01, -4.590e-02, 1.968e-01, 5.900e-01, 6.299e-02, -3.414e-02, 1.580e-01, -7.376e-02, 9.417e-02, -4.065e-02, 1.430e-03, -1.640e-02, 4.694e-02, -3.214e-05));
	r += mul(s0_4, M4(5.960e-02, -1.084e-01, -2.587e-01, 1.333e-01, -7.512e-02, 8.546e-01, -3.336e-01, 3.835e-01, 1.885e-01, 5.548e-02, -6.802e-02, 5.619e-03, -1.972e-02, 1.343e-01, 8.009e-02, -2.308e-01));
	r += mul(s0_5, M4(7.935e-02, -2.185e-02, 9.521e-03, 2.267e-02, 4.557e-02, 4.106e-01, -5.828e-02, 1.617e-01, -8.067e-02, 1.044e-01, -5.421e-02, 2.106e-01, 9.214e-02, -9.703e-03, -6.816e-03, 5.811e-02));
	r += mul(s0_6, M4(-8.258e-02, 5.813e-02, -3.165e-01, -4.061e-02, -2.125e-01, 3.517e-01, 1.852e-01, 3.948e-02, -3.181e-02, -9.381e-02, 3.857e-03, 1.596e-02, 1.605e-01, 1.763e-02, 6.127e-02, 5.993e-02));
	r += mul(s0_7, M4(-1.638e-01, 6.370e-02, -3.140e-02, -6.196e-02, -8.679e-02, 3.919e-01, -7.714e-02, 6.702e-01, 2.352e-01, 4.334e-02, 2.054e-01, -1.406e-01, -1.907e-02, -1.223e-02, 7.092e-02, 5.143e-02));
	r += mul(s0_8, M4(-2.864e-02, 2.101e-02, -5.985e-02, -3.778e-02, 1.740e-01, 4.349e-01, 3.508e-01, -8.256e-02, 1.417e-01, -2.963e-02, 5.655e-02, 5.685e-02, -3.032e-02, -7.560e-03, -3.227e-02, 1.275e-01));
	r += mul(s1_0, M4(-1.037e-01, -5.329e-02, -1.153e-03, 1.188e-03, 4.432e-02, 9.465e-02, 3.873e-02, -1.126e-02, 9.906e-02, -6.669e-03, -3.942e-02, -4.308e-02, 8.005e-03, 3.278e-02, -6.866e-02, -4.305e-02));
	r += mul(s1_1, M4(2.644e-01, 3.882e-02, 1.599e-01, 2.417e-01, -1.866e-01, -5.371e-02, -3.262e-02, -1.531e-01, -9.844e-02, -1.569e-02, -1.239e-02, -1.319e-02, -7.975e-02, 2.914e-02, 9.776e-02, 1.726e-01));
	r += mul(s1_2, M4(-7.199e-03, -9.698e-02, 5.974e-02, 2.839e-02, 1.722e-02, 1.038e-02, 4.286e-02, -1.109e-01, -1.882e-01, -5.766e-02, -1.242e-01, 6.661e-02, -9.155e-02, 9.355e-02, 4.687e-02, -7.348e-03));
	r += mul(s1_3, M4(-2.169e-01, 1.116e-01, -1.917e-01, -3.858e-03, 7.152e-02, -2.615e-02, -7.558e-02, -8.680e-02, -3.220e-02, 1.908e-01, 1.524e-02, 4.523e-03, 8.582e-02, 5.460e-02, 1.954e-01, 2.942e-01));
	r += mul(s1_4, M4(3.139e-02, -9.837e-02, 1.743e-02, 1.242e-01, 8.060e-02, 1.809e-01, -1.421e-01, 1.008e-01, -1.521e-01, 6.146e-02, -8.842e-02, 2.465e-01, -3.182e-01, -4.065e-02, 1.646e-01, -3.455e-01));
	r += mul(s1_5, M4(5.098e-02, -2.421e-02, 1.039e-01, 6.447e-02, -1.005e-01, 1.246e-02, -3.039e-03, 4.305e-02, -6.107e-02, 2.312e-02, -9.916e-02, 1.579e-01, 8.034e-03, 4.809e-02, 4.289e-02, -3.629e-02));
	r += mul(s1_6, M4(2.395e-01, -3.280e-02, -8.095e-02, -1.944e-02, 5.828e-02, 5.073e-02, -2.697e-02, 1.291e-02, -2.329e-01, 1.244e-01, -9.033e-02, 9.723e-02, 7.572e-03, 4.856e-02, -1.690e-01, 1.099e-01));
	r += mul(s1_7, M4(2.168e-02, 3.986e-02, 2.528e-01, -1.322e-01, 9.191e-02, -7.203e-02, -4.302e-02, 1.143e-01, -2.355e-02, 2.543e-03, -4.589e-03, -8.326e-02, -1.032e-01, -1.386e-01, -1.470e-01, 9.319e-02));
	r += mul(s1_8, M4(-4.893e-02, -1.262e-04, 1.475e-01, -7.349e-02, -1.439e-02, -1.244e-03, 7.020e-02, 1.256e-01, 1.024e-01, -4.272e-02, -5.351e-02, 2.883e-02, -6.076e-02, -1.863e-02, -1.155e-01, -1.205e-03));
	r += mul(s2_0, M4(1.458e-02, -5.736e-02, 1.231e-02, 1.840e-01, -1.126e-01, -4.911e-02, -1.182e-01, 1.364e-01, 1.724e-03, 7.908e-02, 6.441e-02, -1.983e-01, -8.041e-02, -3.410e-02, -9.758e-02, -3.682e-02));
	r += mul(s2_1, M4(-1.064e-01, 1.305e-02, 1.300e-02, -2.330e-01, 1.571e-01, -5.209e-02, -1.620e-01, -8.339e-02, -5.291e-02, 6.925e-02, -1.189e-02, 1.150e-01, 1.358e-04, -1.515e-01, 2.310e-02, 8.484e-02));
	r += mul(s2_2, M4(-1.471e-02, 8.768e-02, 1.317e-01, 6.032e-02, -2.307e-02, 4.888e-02, 4.469e-02, -2.151e-01, -1.492e-01, 1.163e-01, 7.104e-02, 8.186e-02, 8.922e-02, -5.862e-02, -9.091e-02, 2.635e-01));
	r += mul(s2_3, M4(7.698e-02, 4.197e-02, -4.196e-02, 1.545e-01, 1.266e-02, -1.129e-01, 1.528e-01, -5.952e-02, 3.781e-02, 8.110e-02, 1.030e-01, -2.405e-02, -5.800e-02, -1.409e-02, -6.333e-02, -2.070e-02));
	r += mul(s2_4, M4(-9.783e-02, 6.081e-02, -6.229e-02, 7.337e-02, -1.360e-01, 1.127e-03, 4.817e-02, -2.041e-01, 6.752e-02, -1.186e-02, -7.077e-02, -1.223e-01, 2.327e-02, 7.801e-03, 6.270e-02, 3.135e-01));
	r += mul(s2_5, M4(9.916e-02, -4.970e-02, 4.994e-02, -7.438e-02, 1.170e-01, 9.513e-02, -1.987e-01, 1.479e-01, -1.008e-01, 1.637e-02, -4.408e-02, 1.639e-01, 1.416e-02, 7.802e-02, 7.414e-02, 2.309e-01));
	r += mul(s2_6, M4(2.317e-02, -9.267e-03, 2.679e-03, -1.355e-02, 1.708e-01, 9.609e-02, 7.928e-02, -2.145e-03, 2.286e-01, 5.851e-02, 7.349e-02, -4.331e-02, 4.207e-02, 5.328e-03, -1.931e-01, 1.520e-02));
	r += mul(s2_7, M4(-4.572e-02, -5.036e-02, 4.120e-02, -2.082e-01, -1.949e-01, -9.669e-02, 4.695e-02, 2.991e-02, -6.106e-03, 6.338e-02, -1.248e-02, 3.833e-02, 1.196e-01, 3.891e-02, 1.696e-01, 3.910e-02));
	r += mul(s2_8, M4(1.415e-03, 5.763e-02, -1.149e-02, -2.407e-01, 1.740e-03, -8.224e-02, -1.945e-02, 1.355e-02, -4.428e-02, 1.613e-02, -3.505e-02, -4.122e-02, -2.886e-02, -6.491e-02, -5.194e-02, -8.513e-02));
	r += mul(s3_0, M4(-1.765e-01, -1.367e-01, -4.111e-02, 1.888e-02, -4.251e-03, 1.376e-02, -3.862e-02, -8.980e-03, 1.037e-01, -1.789e-01, 9.146e-02, 2.300e-01, -1.614e-01, 1.308e-01, -1.819e-01, 1.665e-01));
	r += mul(s3_1, M4(-2.059e-01, -5.625e-02, 2.864e-02, -7.048e-02, -1.706e-03, -1.770e-01, -4.874e-02, -2.786e-02, -1.275e-01, -5.649e-02, 3.366e-02, 1.795e-01, -1.223e-02, -2.242e-01, -1.622e-01, 1.141e-01));
	r += mul(s3_2, M4(1.345e-01, 6.934e-02, 6.283e-02, 1.327e-01, 6.862e-02, -2.143e-01, 8.570e-02, -3.989e-02, -4.377e-02, -1.398e-01, 2.988e-01, 1.374e-01, 1.468e-01, 8.299e-02, -4.532e-02, 2.113e-01));
	r += mul(s3_3, M4(3.431e-01, -9.639e-03, 2.325e-01, -5.670e-02, -1.314e-01, 3.757e-02, -6.452e-02, 6.469e-02, -5.444e-03, -1.508e-01, -4.739e-01, -1.679e-01, 1.373e-01, -6.124e-02, 7.933e-03, -1.489e-02));
	r += mul(s3_4, M4(-1.132e-01, 5.289e-02, 1.535e-02, -2.451e-01, 3.783e-03, 1.595e-01, 9.528e-02, 1.430e-01, 1.694e-01, -1.169e-01, -2.842e-01, -2.346e-01, 3.864e-01, -1.825e-01, -2.749e-01, -1.994e-01));
	r += mul(s3_5, M4(-7.642e-03, 8.555e-02, -1.200e-02, 1.146e-01, 1.802e-02, 1.937e-01, 1.797e-01, 5.593e-03, 4.507e-02, -1.199e-01, -2.198e-01, 1.812e-01, -5.173e-03, 1.568e-01, 9.984e-02, -2.659e-02));
	r += mul(s3_6, M4(6.704e-02, 4.698e-02, -1.374e-02, -9.021e-02, -1.707e-01, 4.764e-03, -9.841e-03, 5.072e-02, 1.605e-01, -2.053e-01, -3.838e-02, -1.444e-02, 2.384e-02, -1.151e-01, -1.005e-01, 4.233e-02));
	r += mul(s3_7, M4(-8.390e-02, 1.908e-02, -1.100e-01, -8.602e-02, 4.900e-02, 1.116e-02, 2.053e-01, -5.726e-02, -9.621e-02, -1.169e-01, 2.024e-01, 4.824e-02, -2.096e-02, 3.369e-02, 2.029e-01, -1.462e-01));
	r += mul(s3_8, M4(1.567e-01, -2.444e-02, 9.509e-02, -1.816e-02, -3.936e-02, -2.473e-02, -1.129e-01, 2.242e-02, -3.106e-03, -1.018e-01, -1.701e-01, 1.499e-01, -1.430e-01, -3.654e-02, 2.967e-02, 8.369e-02));
	r += mul(s4_0, M4(1.034e-01, -5.312e-04, -1.239e-01, -5.456e-02, 1.879e-02, -7.676e-02, -5.956e-02, -1.783e-01, -1.130e-01, -6.370e-02, 2.026e-01, 3.057e-02, -2.113e-01, 5.926e-02, -5.394e-02, 1.535e-01));
	r += mul(s4_1, M4(-6.531e-03, -1.431e-01, -1.812e-01, 1.802e-01, 8.262e-03, 7.741e-02, -6.223e-02, 9.611e-03, -9.345e-02, -8.801e-02, 3.407e-02, 1.188e-01, 1.595e-01, -4.748e-02, 1.459e-02, 2.855e-01));
	r += mul(s4_2, M4(1.810e-01, -4.054e-02, -3.277e-02, -1.111e-01, 6.464e-02, -2.245e-01, 1.261e-02, -2.225e-01, 1.451e-01, 1.198e-01, 2.836e-02, 3.377e-02, 1.767e-01, -4.248e-02, 8.389e-02, 2.485e-02));
	r += mul(s4_3, M4(-9.627e-02, 5.986e-02, 7.569e-03, -7.105e-02, 2.928e-01, -1.040e-01, -2.868e-01, -1.463e-01, -8.511e-03, -1.502e-02, -7.755e-02, 1.688e-01, 6.127e-02, 8.810e-02, 3.685e-02, 2.663e-02));
	r += mul(s4_4, M4(-6.368e-02, 5.359e-02, -3.643e-02, 1.399e-01, 3.936e-03, 2.547e-01, 2.957e-01, 2.191e-01, 2.612e-01, 3.846e-02, 3.046e-02, -7.856e-02, -3.086e-02, -6.937e-02, 5.934e-02, 7.798e-02));
	r += mul(s4_5, M4(-1.666e-01, 4.368e-02, -9.936e-02, 5.513e-02, 2.653e-01, -7.287e-02, -2.554e-01, 2.655e-01, -1.452e-01, 5.002e-02, -6.006e-02, 3.174e-02, -5.631e-03, 7.299e-02, 1.471e-01, 6.928e-03));
	r += mul(s4_6, M4(7.689e-02, -6.132e-02, -9.584e-02, 5.564e-02, 9.063e-02, 1.710e-01, -6.893e-02, -1.494e-01, -1.700e-01, -5.163e-02, -7.758e-02, -4.896e-02, -1.714e-02, 3.621e-02, 8.242e-02, 2.643e-02));
	r += mul(s4_7, M4(1.566e-02, -1.998e-02, -1.708e-01, 2.358e-01, 2.403e-02, 2.645e-01, 4.161e-01, -1.649e-01, 2.306e-01, -3.243e-02, 9.435e-02, -1.145e-01, 1.212e-01, 7.266e-02, 1.598e-01, -7.062e-02));
	r += mul(s4_8, M4(3.270e-02, -1.166e-01, -3.694e-02, 2.545e-01, -1.235e-01, 1.676e-01, 3.960e-03, -1.373e-01, -5.666e-03, -1.110e-01, -9.305e-03, 6.505e-02, -7.349e-03, -2.651e-02, 7.352e-02, -6.788e-03));
	r += mul(s5_0, M4(8.014e-02, 1.658e-02, -5.292e-02, 1.630e-02, -7.409e-02, 1.580e-02, 5.897e-02, -1.103e-02, 8.100e-02, -1.677e-01, -2.744e-01, 8.982e-02, 4.863e-02, -7.768e-02, 2.005e-01, -8.596e-03));
	r += mul(s5_1, M4(-1.216e-01, -7.057e-02, 4.285e-02, 6.371e-02, 5.647e-02, 6.204e-02, 4.051e-03, 1.280e-02, -5.409e-02, 2.483e-02, 4.165e-02, -1.142e-01, 2.118e-01, -2.997e-01, -9.129e-02, 3.454e-01));
	r += mul(s5_2, M4(5.001e-02, 8.768e-02, -7.564e-02, 7.259e-02, -2.339e-02, -4.598e-02, 3.337e-02, 6.158e-02, 2.909e-01, -1.490e-01, 1.540e-01, 6.356e-02, -6.790e-03, 3.766e-02, 2.135e-02, 1.071e-01));
	r += mul(s5_3, M4(3.641e-02, 7.143e-02, -1.138e-01, -2.032e-02, -1.992e-02, -7.735e-02, -5.734e-02, -4.763e-02, -8.787e-03, 3.215e-02, 3.169e-02, 6.289e-02, 3.540e-01, -7.365e-02, -8.671e-02, 9.639e-02));
	r += mul(s5_4, M4(1.139e-01, -1.842e-01, 2.704e-01, -1.627e-01, -4.522e-02, -3.535e-02, 9.351e-02, -2.893e-02, 1.861e-01, 2.077e-01, 1.372e-02, 4.970e-02, 1.317e-01, -3.054e-01, -8.803e-02, 1.714e-01));
	r += mul(s5_5, M4(2.900e-02, 3.563e-03, 1.104e-01, -2.544e-01, 1.542e-02, -6.940e-02, 3.600e-03, 5.139e-02, -3.194e-01, 2.563e-01, 2.459e-02, -3.874e-02, -6.024e-02, 2.414e-02, -1.001e-01, -1.498e-01));
	r += mul(s5_6, M4(-4.244e-02, -5.413e-02, -9.869e-02, 2.158e-02, 1.469e-02, 7.325e-02, 1.743e-01, 1.407e-02, -3.738e-01, -2.959e-02, 3.194e-02, -1.635e-01, 1.413e-01, -4.843e-02, -1.413e-03, -3.039e-02));
	r += mul(s5_7, M4(1.139e-01, -6.672e-03, 2.730e-01, -3.003e-02, 2.715e-02, -3.701e-02, 2.281e-01, -1.225e-01, 2.238e-01, -8.357e-02, -6.010e-03, -2.614e-01, 7.331e-02, -1.523e-02, -2.548e-01, -8.554e-02));
	r += mul(s5_8, M4(3.213e-02, 1.268e-01, 3.449e-02, -2.570e-01, -1.225e-01, 7.178e-02, 9.433e-02, 2.491e-02, 1.620e-01, 1.357e-02, -2.706e-02, 6.437e-03, 1.109e-01, -7.013e-02, -6.792e-02, -1.037e-01));
	r += mul(s6_0, M4(8.426e-02, 1.621e-01, -3.307e-02, 6.085e-02, -2.910e-01, 1.052e-01, -1.113e-01, 1.969e-01, -1.820e-02, -4.381e-02, 5.765e-02, 8.801e-03, -1.752e-01, -4.596e-02, 2.692e-03, -1.809e-02));
	r += mul(s6_1, M4(5.368e-02, 8.830e-03, 1.342e-01, -1.547e-01, 1.712e-01, -2.743e-02, 2.409e-02, 2.343e-01, 2.959e-01, -6.927e-02, 1.156e-01, -2.541e-02, 1.364e-01, 1.695e-01, 1.817e-02, 5.368e-02));
	r += mul(s6_2, M4(5.028e-02, 1.692e-01, 2.213e-02, -7.803e-02, -1.072e-01, 4.685e-02, -5.682e-03, 5.716e-02, 1.669e-01, 8.173e-03, 1.611e-01, 1.414e-01, -1.356e-01, -2.232e-01, -8.235e-02, 1.440e-01));
	r += mul(s6_3, M4(-5.098e-02, 1.137e-01, -9.407e-02, -6.296e-02, -1.385e-01, 1.617e-01, -1.909e-01, 1.068e-01, -6.835e-03, -5.320e-02, -2.809e-01, 4.434e-02, -1.940e-01, 1.189e-01, 6.201e-03, -1.800e-02));
	r += mul(s6_4, M4(-6.221e-02, 5.196e-02, -2.060e-02, 5.637e-02, 3.642e-01, 5.141e-02, 2.882e-02, -2.343e-01, 1.591e-01, -1.559e-02, -1.596e-01, 2.177e-02, 7.673e-02, 3.992e-01, -1.146e-01, 2.448e-01));
	r += mul(s6_5, M4(-5.695e-02, 7.100e-02, -2.178e-02, 1.582e-01, 2.372e-02, -2.224e-02, -4.140e-02, -8.826e-02, -4.937e-02, -1.031e-01, -1.706e-01, -4.947e-02, -1.536e-01, 1.721e-01, 6.676e-03, 8.255e-02));
	r += mul(s6_6, M4(-9.010e-02, 1.503e-01, 7.492e-02, -1.993e-03, -3.702e-01, 1.511e-01, -3.409e-02, -2.098e-02, -4.503e-01, -6.997e-02, -1.055e-01, -2.102e-02, -7.406e-02, 2.809e-02, 3.084e-01, 8.653e-02));
	r += mul(s6_7, M4(-1.697e-01, 1.711e-01, 1.144e-01, 2.860e-02, 2.195e-02, 1.660e-03, -7.057e-02, -1.680e-01, -1.430e-01, -3.346e-02, -5.121e-02, 2.135e-02, 1.318e-01, -1.161e-02, 1.916e-01, 1.212e-01));
	r += mul(s6_8, M4(-3.071e-02, 1.432e-01, 1.052e-01, 7.180e-02, -4.928e-02, 5.232e-02, -3.310e-03, -3.017e-05, 1.398e-01, -7.870e-02, -9.837e-02, 2.374e-02, 1.507e-01, -8.493e-02, -5.154e-02, 5.780e-02));
	r += mul(s7_0, M4(2.455e-01, -7.282e-02, 7.852e-02, 4.070e-02, -3.297e-02, 4.145e-02, -1.426e-01, 7.745e-02, -3.729e-02, 8.566e-03, 1.435e-01, -1.010e-01, 6.033e-02, -8.686e-03, -7.574e-02, -3.943e-02));
	r += mul(s7_1, M4(-5.348e-02, -1.035e-01, 1.114e-01, -1.529e-01, -6.293e-03, -3.968e-02, -6.971e-02, 1.515e-01, 7.013e-02, 1.038e-01, -3.970e-02, -1.193e-01, 1.365e-01, 6.677e-02, 2.004e-02, -1.010e-02));
	r += mul(s7_2, M4(1.277e-02, -3.852e-02, -6.031e-02, -1.067e-02, -1.349e-02, -5.066e-02, -9.634e-02, 1.096e-01, -4.221e-02, -2.845e-02, -6.615e-02, -1.064e-01, -5.977e-02, -1.288e-01, -1.142e-01, 3.774e-02));
	r += mul(s7_3, M4(1.835e-01, -1.255e-01, 6.001e-02, -9.365e-02, -8.400e-02, -2.954e-02, -1.218e-01, 2.082e-02, 9.881e-03, -6.183e-02, 7.922e-02, 4.288e-02, -8.498e-02, -7.827e-02, -1.268e-02, 5.906e-06));
	r += mul(s7_4, M4(-1.167e-02, -1.976e-01, 1.218e-01, 2.846e-02, 1.403e-01, -1.914e-02, -3.142e-02, -6.645e-03, 1.120e-01, 4.741e-02, -4.679e-02, 1.165e-01, 6.907e-02, 9.039e-02, -1.763e-01, 6.242e-02));
	r += mul(s7_5, M4(-6.373e-02, -4.400e-02, -3.731e-02, -3.382e-02, 7.034e-02, -4.918e-02, 8.839e-03, -4.738e-02, -1.555e-02, -2.111e-02, -1.032e-02, 7.233e-03, 8.502e-02, 9.928e-02, -1.683e-02, 1.288e-01));
	r += mul(s7_6, M4(1.680e-01, 1.016e-02, 6.608e-02, -4.082e-03, 3.294e-02, 4.425e-02, -1.113e-01, -6.943e-02, -3.064e-02, -1.889e-02, 9.421e-02, 2.824e-02, 6.753e-02, -7.434e-02, -5.314e-02, -2.174e-02));
	r += mul(s7_7, M4(-3.561e-02, -1.050e-01, -1.267e-01, -7.304e-02, 9.945e-02, -1.332e-01, -4.349e-02, -7.886e-02, -7.833e-02, 1.614e-01, 7.414e-02, 2.130e-03, 1.372e-01, 1.233e-02, -1.111e-01, 3.728e-02));
	r += mul(s7_8, M4(-1.044e-02, -5.582e-02, 1.359e-02, -1.386e-02, -3.630e-02, -1.954e-02, -1.212e-01, 1.234e-02, -2.190e-02, 1.427e-02, -3.315e-02, 1.844e-02, -1.252e-01, -5.419e-02, -3.355e-02, 2.707e-03));
	r += V4(3.455e-02, -5.682e-02, 3.028e-02, 8.647e-03);
	return r;
}

V4 f3(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) {
	V4 r = 0.0;
	r += mul(s0_0, M4(-1.096e-01, -1.852e-02, -1.637e-02, 4.392e-02, -2.820e-02, 2.433e-02, -1.942e-01, -1.233e-01, 1.259e-01, -9.204e-02, 2.761e-02, 2.025e-02, -5.680e-02, 2.470e-02, 1.573e-01, 1.047e-01));
	r += mul(s0_1, M4(2.607e-01, -2.214e-02, 3.786e-02, 7.953e-02, 1.091e+00, -3.300e-01, -5.729e-02, 1.207e-01, -7.803e-02, -1.589e-01, 1.505e-02, -2.123e-02, 1.959e-01, 5.892e-02, -1.357e-02, 1.410e-01));
	r += mul(s0_2, M4(-5.059e-02, -7.549e-02, 4.878e-03, 7.983e-02, -1.802e-01, 1.381e-01, -7.327e-02, -1.975e-01, 1.452e-02, 1.400e-01, -1.519e-01, 1.185e-01, 2.760e-02, -8.449e-02, 3.833e-02, 1.663e-02));
	r += mul(s0_3, M4(-4.666e-02, -3.460e-04, 1.701e-02, -8.272e-02, -1.174e-01, -1.036e-01, 1.497e-03, 2.781e-01, 9.424e-02, 2.704e-02, -3.550e-02, -1.404e-01, 1.897e-01, 1.363e-01, 1.109e-01, 3.753e-02));
	r += mul(s0_4, M4(-1.341e-01, 3.821e-02, -3.010e-02, -1.671e-01, -1.511e-01, -6.140e-01, -1.095e-01, -3.052e-01, 2.433e-01, -3.002e-02, -2.312e-01, -8.997e-04, 1.391e-02, 1.397e-01, 1.878e-01, -2.238e-02));
	r += mul(s0_5, M4(1.033e-01, -8.012e-03, -6.417e-02, 9.283e-02, -1.379e-01, -4.926e-02, -2.075e-01, -1.917e-01, 4.786e-02, 2.361e-02, -5.584e-02, 9.578e-02, -5.804e-02, -4.844e-03, -1.844e-02, -1.057e-02));
	r += mul(s0_6, M4(-2.536e-02, -5.752e-02, 2.363e-02, 1.713e-01, -5.923e-01, 3.168e-01, 1.674e-01, 1.652e-01, -6.523e-02, -3.655e-03, -5.167e-02, 1.683e-02, -1.840e-01, 5.627e-02, -1.028e-01, 1.927e-01));
	r += mul(s0_7, M4(-1.704e-01, 1.462e-02, 3.242e-02, 7.732e-02, -6.424e-01, 1.905e-01, -8.243e-02, 7.533e-02, 2.171e-01, 6.840e-02, -9.205e-02, -6.206e-03, 5.733e-02, -1.789e-02, -9.110e-02, 5.664e-02));
	r += mul(s0_8, M4(-5.938e-03, -7.444e-02, 5.562e-02, -6.044e-02, -3.737e-01, 1.747e-01, 1.633e-01, 1.036e-01, 1.738e-02, -2.388e-02, -1.659e-01, -8.670e-02, 7.200e-02, -1.353e-01, -4.155e-04, -8.666e-02));
	r += mul(s1_0, M4(1.040e-01, 4.584e-04, 7.114e-02, -1.014e-01, -4.784e-02, 8.807e-02, -2.202e-02, -6.059e-02, -1.686e-02, -6.356e-02, 1.230e-01, 1.079e-01, -1.823e-02, 2.432e-01, -1.581e-01, -2.449e-01));
	r += mul(s1_1, M4(3.786e-02, -1.782e-01, -4.362e-02, 8.725e-02, 1.882e-03, -5.855e-02, 1.016e-01, 8.599e-02, -1.231e-01, 5.488e-02, 5.245e-02, -8.730e-02, -3.452e-01, 1.340e-01, -2.548e-02, -2.157e-01));
	r += mul(s1_2, M4(-1.819e-03, -1.625e-01, 2.959e-02, 1.547e-02, -6.048e-03, 1.479e-01, -1.464e-01, 7.402e-02, -1.513e-01, 3.002e-02, 6.133e-02, 5.366e-02, -1.184e-01, 3.590e-01, -1.053e-01, -1.425e-01));
	r += mul(s1_3, M4(1.404e-01, -9.241e-02, 1.825e-01, 2.719e-04, -5.835e-02, 6.595e-03, -9.739e-03, -1.057e-01, 1.661e-01, -1.151e-02, 6.201e-02, 1.203e-02, 3.053e-01, -8.240e-03, 6.581e-02, -1.970e-02));
	r += mul(s1_4, M4(-2.797e-01, 2.141e-02, 1.896e-01, 3.946e-02, -1.040e-01, -2.634e-01, 1.826e-01, -2.420e-01, -2.634e-01, -1.385e-01, 1.424e-01, -1.114e-01, 1.808e-01, -1.194e-01, 9.477e-02, -2.438e-01));
	r += mul(s1_5, M4(3.524e-01, -2.425e-02, -8.163e-02, 1.418e-02, 1.681e-01, 1.876e-02, -1.001e-01, 5.731e-02, -7.652e-02, -2.750e-02, 2.377e-01, -1.445e-01, -1.499e-01, -6.638e-02, 2.818e-02, -2.138e-01));
	r += mul(s1_6, M4(-3.080e-02, 1.693e-02, 5.671e-03, 1.528e-01, 5.871e-02, -2.643e-02, -5.709e-02, 4.359e-02, -2.522e-02, 1.197e-01, -9.253e-02, 4.255e-02, 1.676e-01, 2.026e-01, -2.956e-01, -2.714e-01));
	r += mul(s1_7, M4(-1.606e-01, 6.631e-02, -1.906e-01, -2.287e-01, -6.485e-02, -7.836e-02, 4.530e-02, 5.725e-02, 6.908e-02, 1.061e-01, 1.805e-01, 7.356e-02, 2.819e-01, 1.429e-01, -1.723e-01, -3.174e-02));
	r += mul(s1_8, M4(-8.581e-02, -2.105e-02, -5.922e-03, -3.853e-02, 4.403e-02, 7.008e-02, 2.021e-02, 8.475e-02, -4.575e-02, -1.065e-01, 5.490e-02, 8.552e-02, 1.052e-01, -5.821e-02, -3.439e-02, -1.344e-02));
	r += mul(s2_0, M4(-9.902e-02, -3.794e-02, 1.320e-01, 1.776e-02, -7.469e-02, -1.189e-02, -7.807e-02, -7.543e-02, 4.629e-02, 3.148e-02, -1.269e-01, -1.108e-01, -6.617e-02, 5.129e-02, -1.180e-01, 2.706e-03));
	r += mul(s2_1, M4(9.910e-02, -1.586e-01, 8.186e-02, 5.193e-01, 2.276e-01, 2.274e-02, -1.020e-01, -1.754e-03, 1.400e-01, -8.362e-02, 3.942e-02, 5.102e-02, -8.412e-02, 6.594e-02, -2.943e-02, -6.063e-02));
	r += mul(s2_2, M4(1.368e-02, 4.901e-02, -1.587e-01, -3.238e-02, 4.454e-02, -1.343e-01, -1.012e-01, -2.655e-01, -1.020e-01, 1.676e-01, 1.253e-01, -9.942e-02, 3.205e-02, -5.055e-02, -1.117e-01, -1.089e-01));
	r += mul(s2_3, M4(-5.653e-03, -1.945e-02, 1.854e-01, 7.696e-02, 1.377e-02, -1.351e-01, -2.388e-01, -1.644e-01, 1.011e-02, -5.458e-02, -7.235e-02, 2.097e-02, -1.990e-02, 1.203e-02, 9.880e-02, -1.743e-01));
	r += mul(s2_4, M4(-8.908e-02, 7.718e-03, 6.576e-02, 1.137e-03, 7.829e-02, 2.680e-01, -1.524e-01, -1.810e-01, -6.039e-02, 1.009e-02, -7.063e-02, 2.310e-01, -2.906e-01, -6.006e-02, 1.890e-01, -1.672e-01));
	r += mul(s2_5, M4(-2.529e-03, 8.173e-02, 3.242e-02, -5.432e-02, 3.282e-02, -1.262e-01, 1.755e-01, 9.609e-03, -7.836e-02, 9.272e-02, 3.969e-02, 1.379e-01, -1.428e-01, -2.265e-02, -1.399e-01, 9.609e-02));
	r += mul(s2_6, M4(8.486e-02, -3.489e-02, 7.254e-02, 2.976e-02, 4.917e-02, -7.064e-02, -7.310e-02, -1.542e-01, -1.208e-01, -1.891e-02, -2.144e-02, -1.589e-02, -3.039e-04, 1.014e-01, -1.183e-02, -1.660e-02));
	r += mul(s2_7, M4(1.340e-01, 1.040e-01, -7.289e-02, -3.622e-02, -4.446e-03, -6.201e-02, 1.615e-01, 1.755e-01, 7.300e-02, 8.229e-02, -8.150e-02, 4.328e-02, -5.828e-02, -5.730e-03, -1.219e-01, -1.250e-01));
	r += mul(s2_8, M4(-8.284e-02, -1.916e-02, 1.192e-01, -1.383e-01, 2.001e-02, -8.057e-03, 8.148e-02, -8.773e-04, 2.151e-02, 1.743e-04, -6.163e-04, -4.874e-02, 1.019e-01, -1.012e-02, -1.159e-02, -3.961e-02));
	r += mul(s3_0, M4(-9.251e-03, 1.791e-01, 6.008e-02, -1.205e-01, -1.015e-02, 7.059e-02, -9.721e-02, -1.524e-02, -1.661e-01, 1.372e-01, 4.558e-02, -1.628e-01, -1.264e-01, -3.679e-02, -3.447e-02, 2.277e-02));
	r += mul(s3_1, M4(-1.366e-01, 2.667e-01, -4.057e-02, 1.793e-02, -1.248e-01, 1.381e-01, 1.214e-01, 5.758e-02, 9.372e-02, -1.240e-01, 3.346e-01, -7.787e-02, 6.877e-02, -3.500e-01, 1.308e-01, 3.518e-02));
	r += mul(s3_2, M4(-3.532e-03, -1.039e-01, -1.171e-01, 6.105e-02, 2.353e-01, -1.094e-01, 1.156e-01, -2.502e-03, -2.945e-01, 1.159e-01, 2.745e-01, 7.918e-02, 6.662e-03, -6.117e-02, 3.318e-02, 6.362e-03));
	r += mul(s3_3, M4(5.691e-02, -8.692e-02, -8.899e-02, 1.076e-01, 8.120e-03, 1.426e-02, -1.447e-01, -3.731e-02, 7.444e-02, -1.054e-01, 1.010e-01, 1.529e-01, 4.061e-02, 1.064e-01, -1.352e-01, -1.880e-01));
	r += mul(s3_4, M4(-1.729e-01, 2.776e-02, -7.887e-03, -2.290e-01, -2.224e-01, 2.358e-03, -7.686e-02, -2.962e-02, 2.791e-01, -3.004e-01, 2.505e-02, -2.687e-01, 1.216e-01, 9.880e-02, 7.356e-02, -6.637e-02));
	r += mul(s3_5, M4(1.795e-01, -3.148e-02, -2.065e-01, -2.491e-01, -1.342e-01, -1.418e-01, 1.059e-01, -1.083e-01, -2.623e-02, 1.010e-01, 1.061e-01, 2.263e-01, 1.283e-02, 2.022e-01, 2.694e-02, 1.391e-01));
	r += mul(s3_6, M4(-2.450e-02, 1.307e-02, -8.529e-03, 3.717e-02, 4.865e-02, 1.302e-01, 4.016e-02, -4.964e-02, -1.244e-01, -5.523e-02, 1.237e-02, -4.402e-02, -1.660e-02, 4.089e-02, -9.886e-02, -9.060e-03));
	r += mul(s3_7, M4(-8.691e-02, -5.331e-02, 1.517e-01, 1.034e-01, -9.627e-02, 3.671e-02, 7.032e-02, 2.741e-02, 1.266e-01, 1.678e-02, -7.681e-02, -7.390e-02, -5.747e-02, 2.561e-01, -1.791e-02, 4.539e-02));
	r += mul(s3_8, M4(-7.839e-02, 6.835e-03, -3.815e-02, -6.703e-02, 1.276e-02, -4.495e-02, -8.536e-02, -5.965e-02, 7.876e-02, -7.166e-02, 1.930e-01, 1.739e-01, 2.574e-02, -6.235e-02, 1.674e-01, -2.943e-02));
	r += mul(s4_0, M4(3.382e-02, -5.765e-02, -1.294e-01, -1.117e-01, 7.191e-02, -1.291e-01, -1.010e-01, -6.787e-02, -9.919e-02, -1.310e-02, -5.468e-02, -8.673e-02, -4.614e-02, 2.058e-02, 1.568e-02, 6.677e-02));
	r += mul(s4_1, M4(9.792e-02, -1.954e-01, -1.295e-01, -9.406e-02, 7.871e-03, 2.065e-02, -1.771e-01, -1.587e-01, 1.736e-02, -7.412e-02, -1.920e-02, 6.103e-02, -1.134e-01, -7.384e-02, 2.419e-01, -2.548e-01));
	r += mul(s4_2, M4(-1.086e-01, -6.933e-02, -2.300e-02, -4.260e-02, 7.342e-02, 1.651e-01, -2.871e-01, -1.338e-01, 2.483e-02, 1.475e-02, 2.509e-02, 8.418e-02, 9.221e-02, 8.517e-02, -3.979e-02, 1.869e-02));
	r += mul(s4_3, M4(-1.124e-01, 1.399e-01, -2.047e-01, -1.929e-01, 6.006e-02, 3.435e-02, 2.235e-01, 4.982e-02, -1.128e-01, -2.094e-02, 3.461e-01, -9.115e-04, -7.437e-02, -2.786e-03, 9.743e-02, 1.126e-01));
	r += mul(s4_4, M4(-2.532e-01, -1.764e-02, -7.388e-02, 2.223e-01, 3.639e-01, 1.871e-01, 7.373e-02, 9.498e-02, 1.290e-01, 1.640e-02, 3.807e-02, 1.783e-01, -6.746e-02, 2.492e-02, -2.109e-01, -7.012e-02));
	r += mul(s4_5, M4(-2.031e-01, -1.759e-01, -4.291e-02, -1.635e-01, 1.516e-01, 1.822e-01, -8.788e-02, -9.115e-02, -3.340e-02, 1.875e-02, -1.274e-01, -1.669e-01, -4.712e-02, 2.403e-02, 2.677e-02, -9.679e-02));
	r += mul(s4_6, M4(-1.202e-01, -1.172e-01, 1.657e-01, 1.462e-01, 1.135e-01, -2.146e-02, 1.613e-01, -8.259e-02, -3.707e-02, 1.363e-01, 1.371e-01, -8.764e-02, 7.924e-02, 3.371e-02, 7.389e-02, 2.623e-02));
	r += mul(s4_7, M4(-4.097e-03, -6.457e-02, 3.019e-01, 9.493e-02, -1.907e-01, 2.051e-01, 1.891e-01, 1.184e-01, -6.448e-02, -3.581e-02, -9.772e-03, -1.005e-02, -9.187e-02, -5.031e-02, -7.213e-02, 4.266e-02));
	r += mul(s4_8, M4(4.281e-02, -5.041e-02, 1.101e-01, 9.184e-02, -2.280e-01, 6.764e-03, -1.061e-01, -2.754e-01, 2.351e-02, -4.372e-02, -4.200e-02, 1.559e-02, -1.357e-02, 3.516e-02, 4.607e-02, -1.332e-02));
	r += mul(s5_0, M4(-7.007e-02, 1.679e-01, -1.274e-02, 7.591e-02, 1.389e-02, 1.539e-02, -9.365e-02, 8.885e-02, -7.271e-02, 4.573e-02, 1.978e-01, 3.180e-02, 3.804e-02, -1.208e-01, -1.929e-01, 1.365e-01));
	r += mul(s5_1, M4(-3.422e-03, 9.937e-02, 2.542e-02, 1.253e-01, -7.473e-02, -1.020e-03, -1.301e-02, 5.780e-02, 1.960e-01, -7.564e-02, 8.091e-02, 4.205e-02, -3.428e-01, -1.536e-01, -1.327e-01, -1.579e-01));
	r += mul(s5_2, M4(9.969e-02, -1.642e-01, -5.405e-02, 1.528e-01, 3.682e-02, -1.369e-02, 1.060e-01, -5.419e-02, 9.039e-03, -3.165e-02, -8.450e-02, -8.017e-02, -8.518e-02, 2.551e-02, -1.087e-01, 1.125e-01));
	r += mul(s5_3, M4(-1.152e-01, 7.566e-02, 7.127e-02, -4.112e-02, 3.422e-02, -2.033e-01, 7.094e-02, -3.279e-03, 1.251e-01, -1.479e-01, 3.210e-01, 2.175e-02, 1.728e-01, -5.799e-02, -7.935e-03, 9.648e-02));
	r += mul(s5_4, M4(2.459e-01, 2.175e-01, 2.886e-02, -6.984e-02, 1.226e-01, 7.211e-03, 1.837e-02, 9.922e-02, 4.560e-01, -1.017e-01, 2.737e-02, 6.745e-02, 1.495e-03, 1.674e-01, -1.435e-01, -4.169e-02));
	r += mul(s5_5, M4(-7.349e-02, -5.326e-02, 1.862e-01, -1.118e-01, -6.708e-02, -1.111e-01, -3.200e-02, -1.652e-01, -3.148e-03, -4.066e-02, -7.074e-02, -5.107e-02, -3.497e-02, -1.160e-01, -1.724e-01, 5.532e-03));
	r += mul(s5_6, M4(4.559e-02, 8.109e-02, 8.744e-02, 8.321e-02, -8.380e-03, 8.471e-02, -9.455e-02, -8.131e-02, -2.126e-01, 1.962e-01, -1.948e-02, 7.930e-02, 1.526e-01, -1.633e-01, -6.995e-02, 8.871e-02));
	r += mul(s5_7, M4(-6.974e-02, 5.436e-02, 3.158e-02, 9.851e-02, -1.683e-02, 1.089e-01, -3.526e-02, -8.762e-02, -2.212e-01, -4.418e-02, -1.515e-01, 6.581e-02, 2.384e-01, -1.836e-02, -6.843e-02, -7.604e-02));
	r += mul(s5_8, M4(4.627e-03, 5.753e-02, -6.744e-02, -2.015e-01, -4.092e-02, 6.422e-02, 1.893e-02, -2.088e-01, -9.010e-02, 2.032e-02, -6.638e-02, -3.300e-03, 8.718e-02, -2.528e-02, 2.858e-02, 1.160e-01));
	r += mul(s6_0, M4(6.870e-02, 1.131e-02, 7.865e-02, -3.451e-02, 1.591e-02, -1.381e-01, -9.905e-02, -2.441e-01, -1.138e-01, 2.561e-01, -2.368e-02, -1.296e-01, 1.939e-01, -2.497e-01, -4.962e-02, -1.540e-01));
	r += mul(s6_1, M4(1.207e-01, 1.171e-01, -9.254e-02, -8.790e-03, -5.541e-02, -1.699e-01, -2.360e-02, -1.511e-01, 4.065e-02, -1.579e-01, -1.230e-01, 3.388e-02, -7.535e-02, -2.809e-04, -1.092e-01, 8.226e-02));
	r += mul(s6_2, M4(1.462e-01, 1.123e-02, -1.396e-01, 3.841e-02, -1.342e-01, 1.159e-01, 7.373e-03, -9.726e-02, 1.521e-01, 2.392e-01, -3.398e-02, -7.629e-02, 3.649e-02, 1.545e-01, 1.291e-01, -1.353e-01));
	r += mul(s6_3, M4(1.193e-01, 6.714e-02, -5.676e-02, -6.763e-02, 2.939e-01, -4.573e-02, -2.876e-02, 7.910e-02, 2.628e-01, -3.672e-02, 5.333e-02, -1.391e-01, -2.088e-02, 3.584e-01, 3.513e-02, 3.537e-02));
	r += mul(s6_4, M4(-3.668e-02, -4.670e-02, 7.637e-02, -3.769e-02, 3.771e-02, 1.010e-01, -2.853e-01, 1.099e-01, -1.628e-02, -2.058e-01, -5.978e-02, 2.668e-01, -2.075e-01, -2.548e-01, 2.696e-01, 1.503e-01));
	r += mul(s6_5, M4(-9.871e-02, 1.987e-01, -5.740e-02, 6.089e-02, -1.584e-01, 8.837e-02, -3.021e-02, 1.039e-01, -9.652e-02, -1.423e-01, -4.528e-02, 3.550e-02, 8.129e-02, 7.949e-02, -3.520e-02, 1.582e-01));
	r += mul(s6_6, M4(-9.904e-03, 5.141e-02, -4.732e-03, 2.337e-02, -3.112e-03, 1.407e-01, 6.291e-02, 5.261e-02, 1.129e-02, 1.724e-01, 1.948e-01, 1.209e-01, -3.328e-02, 6.157e-02, -1.265e-01, -1.565e-01));
	r += mul(s6_7, M4(1.120e-01, 1.573e-01, 7.552e-02, -1.189e-01, -1.018e-01, -5.837e-03, 1.956e-01, -1.450e-01, -1.985e-01, -2.056e-01, 1.990e-01, 2.739e-02, -7.748e-02, 2.858e-02, -1.676e-01, -2.205e-01));
	r += mul(s6_8, M4(3.343e-02, -1.598e-02, 8.243e-02, -1.057e-01, -9.837e-02, -1.382e-01, 4.827e-02, -2.322e-02, -6.006e-02, -1.225e-02, -8.345e-03, 3.788e-02, 7.137e-02, -8.822e-02, -1.240e-01, 1.587e-01));
	r += mul(s7_0, M4(4.197e-03, -9.150e-02, 2.243e-02, 1.510e-01, -4.178e-02, -4.519e-03, -2.065e-01, -1.866e-01, -6.665e-02, 6.482e-02, -3.272e-02, -1.524e-02, -5.689e-02, -8.562e-02, -1.224e-01, -5.034e-02));
	r += mul(s7_1, M4(-2.028e-01, 1.248e-02, -7.040e-02, 6.906e-02, -1.455e-01, -9.177e-02, -2.913e-02, -8.226e-02, 8.236e-02, 1.811e-01, -5.199e-02, 9.111e-02, 2.452e-01, -2.731e-03, -1.706e-02, 8.847e-02));
	r += mul(s7_2, M4(4.152e-02, -2.097e-02, -6.550e-02, 5.457e-02, -8.043e-02, 4.064e-02, 2.533e-02, 4.574e-02, -5.159e-02, -2.161e-02, -4.714e-02, -1.196e-01, 1.831e-02, -7.269e-02, -2.514e-02, -1.522e-01));
	r += mul(s7_3, M4(1.127e-01, -1.283e-01, -6.297e-02, 1.758e-02, 1.899e-01, 2.562e-02, -1.396e-01, -8.503e-02, -3.588e-02, 8.375e-02, -1.897e-01, 3.284e-02, -5.664e-02, 7.889e-02, -1.285e-01, -4.259e-02));
	r += mul(s7_4, M4(1.804e-01, -1.357e-01, -9.422e-02, 1.343e-01, 6.370e-02, -8.731e-02, -2.610e-01, 3.235e-02, 2.090e-02, 1.737e-02, 3.840e-02, 6.650e-02, -1.454e-01, -9.199e-02, 1.087e-01, -8.518e-02));
	r += mul(s7_5, M4(-7.470e-02, 1.550e-02, 5.159e-02, 1.604e-01, -2.909e-02, -3.405e-02, -4.281e-02, 1.604e-01, -7.784e-02, 9.256e-02, 1.461e-01, 1.052e-01, -6.975e-02, 2.712e-02, 9.854e-02, 6.271e-02));
	r += mul(s7_6, M4(5.877e-02, -1.074e-01, -1.386e-01, 5.543e-02, -6.297e-03, -4.513e-02, -5.606e-02, 8.423e-02, -9.024e-02, -8.228e-02, 5.863e-02, -1.366e-02, -6.564e-02, -4.470e-04, 1.867e-03, 1.007e-01));
	r += mul(s7_7, M4(5.408e-02, -1.122e-02, -1.600e-01, -2.141e-02, 2.848e-01, -3.632e-02, 5.869e-02, -1.663e-01, 1.365e-01, 8.206e-03, 6.789e-02, -3.498e-02, 3.298e-02, -9.073e-02, 4.929e-02, -3.582e-02));
	r += mul(s7_8, M4(6.673e-02, -8.083e-02, -5.151e-03, -4.743e-02, 3.548e-02, -1.058e-01, -6.822e-02, -2.533e-02, -2.161e-02, -8.336e-02, 1.863e-02, -4.424e-02, 8.914e-02, 1.755e-02, 6.385e-02, 3.120e-02));
	r += V4(-4.162e-03, -6.545e-02, 3.186e-02, -6.206e-02);
	return r;
}

void Pass4(uint2 blockStart, uint3 tid) {
	float2 pt = float2(GetInputPt());
	uint2 gxy = Rmp8x8(tid.x) + blockStart;
	uint2 size = GetInputSize();
	if (gxy.x >= size.x || gxy.y >= size.y) {
		return;
	}
	float2 pos = (gxy + 0.5) * pt;

	V4 s0_0 = l0(-1.0, -1.0);
	V4 s0_1 = l0(0.0, -1.0);
	V4 s0_2 = l0(1.0, -1.0);
	V4 s0_3 = l0(-1.0, 0.0);
	V4 s0_4 = l0(0.0, 0.0);
	V4 s0_5 = l0(1.0, 0.0);
	V4 s0_6 = l0(-1.0, 1.0);
	V4 s0_7 = l0(0.0, 1.0);
	V4 s0_8 = l0(1.0, 1.0);
	V4 s1_0 = -max(-s0_0, 0.0);
	V4 s1_1 = -max(-s0_1, 0.0);
	V4 s1_2 = -max(-s0_2, 0.0);
	V4 s1_3 = -max(-s0_3, 0.0);
	V4 s1_4 = -max(-s0_4, 0.0);
	V4 s1_5 = -max(-s0_5, 0.0);
	V4 s1_6 = -max(-s0_6, 0.0);
	V4 s1_7 = -max(-s0_7, 0.0);
	V4 s1_8 = -max(-s0_8, 0.0);
	s0_0 = max(s0_0, 0.0);
	s0_1 = max(s0_1, 0.0);
	s0_2 = max(s0_2, 0.0);
	s0_3 = max(s0_3, 0.0);
	s0_4 = max(s0_4, 0.0);
	s0_5 = max(s0_5, 0.0);
	s0_6 = max(s0_6, 0.0);
	s0_7 = max(s0_7, 0.0);
	s0_8 = max(s0_8, 0.0);

	V4 s2_0 = l1(-1.0, -1.0);
	V4 s2_1 = l1(0.0, -1.0);
	V4 s2_2 = l1(1.0, -1.0);
	V4 s2_3 = l1(-1.0, 0.0);
	V4 s2_4 = l1(0.0, 0.0);
	V4 s2_5 = l1(1.0, 0.0);
	V4 s2_6 = l1(-1.0, 1.0);
	V4 s2_7 = l1(0.0, 1.0);
	V4 s2_8 = l1(1.0, 1.0);
	V4 s3_0 = -max(-s2_0, 0.0);
	V4 s3_1 = -max(-s2_1, 0.0);
	V4 s3_2 = -max(-s2_2, 0.0);
	V4 s3_3 = -max(-s2_3, 0.0);
	V4 s3_4 = -max(-s2_4, 0.0);
	V4 s3_5 = -max(-s2_5, 0.0);
	V4 s3_6 = -max(-s2_6, 0.0);
	V4 s3_7 = -max(-s2_7, 0.0);
	V4 s3_8 = -max(-s2_8, 0.0);
	s2_0 = max(s2_0, 0.0);
	s2_1 = max(s2_1, 0.0);
	s2_2 = max(s2_2, 0.0);
	s2_3 = max(s2_3, 0.0);
	s2_4 = max(s2_4, 0.0);
	s2_5 = max(s2_5, 0.0);
	s2_6 = max(s2_6, 0.0);
	s2_7 = max(s2_7, 0.0);
	s2_8 = max(s2_8, 0.0);

	V4 s4_0 = l2(-1.0, -1.0);
	V4 s4_1 = l2(0.0, -1.0);
	V4 s4_2 = l2(1.0, -1.0);
	V4 s4_3 = l2(-1.0, 0.0);
	V4 s4_4 = l2(0.0, 0.0);
	V4 s4_5 = l2(1.0, 0.0);
	V4 s4_6 = l2(-1.0, 1.0);
	V4 s4_7 = l2(0.0, 1.0);
	V4 s4_8 = l2(1.0, 1.0);
	V4 s5_0 = -max(-s4_0, 0.0);
	V4 s5_1 = -max(-s4_1, 0.0);
	V4 s5_2 = -max(-s4_2, 0.0);
	V4 s5_3 = -max(-s4_3, 0.0);
	V4 s5_4 = -max(-s4_4, 0.0);
	V4 s5_5 = -max(-s4_5, 0.0);
	V4 s5_6 = -max(-s4_6, 0.0);
	V4 s5_7 = -max(-s4_7, 0.0);
	V4 s5_8 = -max(-s4_8, 0.0);
	s4_0 = max(s4_0, 0.0);
	s4_1 = max(s4_1, 0.0);
	s4_2 = max(s4_2, 0.0);
	s4_3 = max(s4_3, 0.0);
	s4_4 = max(s4_4, 0.0);
	s4_5 = max(s4_5, 0.0);
	s4_6 = max(s4_6, 0.0);
	s4_7 = max(s4_7, 0.0);
	s4_8 = max(s4_8, 0.0);

	V4 s6_0 = l3(-1.0, -1.0);
	V4 s6_1 = l3(0.0, -1.0);
	V4 s6_2 = l3(1.0, -1.0);
	V4 s6_3 = l3(-1.0, 0.0);
	V4 s6_4 = l3(0.0, 0.0);
	V4 s6_5 = l3(1.0, 0.0);
	V4 s6_6 = l3(-1.0, 1.0);
	V4 s6_7 = l3(0.0, 1.0);
	V4 s6_8 = l3(1.0, 1.0);
	V4 s7_0 = -max(-s6_0, 0.0);
	V4 s7_1 = -max(-s6_1, 0.0);
	V4 s7_2 = -max(-s6_2, 0.0);
	V4 s7_3 = -max(-s6_3, 0.0);
	V4 s7_4 = -max(-s6_4, 0.0);
	V4 s7_5 = -max(-s6_5, 0.0);
	V4 s7_6 = -max(-s6_6, 0.0);
	V4 s7_7 = -max(-s6_7, 0.0);
	V4 s7_8 = -max(-s6_8, 0.0);
	s6_0 = max(s6_0, 0.0);
	s6_1 = max(s6_1, 0.0);
	s6_2 = max(s6_2, 0.0);
	s6_3 = max(s6_3, 0.0);
	s6_4 = max(s6_4, 0.0);
	s6_5 = max(s6_5, 0.0);
	s6_6 = max(s6_6, 0.0);
	s6_7 = max(s6_7, 0.0);
	s6_8 = max(s6_8, 0.0);

	t4[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8);
	t5[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8);
	t6[gxy] = f2(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8);
	t7[gxy] = f3(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8);
}

//!PASS 5
//!DESC conv4
//!BLOCK_SIZE 8
//!NUM_THREADS 64
//!IN t4, t5, t6, t7
//!OUT t0, t1, t2, t3

#define l0(x, y) V4(O(t4, float2(x, y)))
#define l1(x, y) V4(O(t5, float2(x, y)))
#define l2(x, y) V4(O(t6, float2(x, y)))
#define l3(x, y) V4(O(t7, float2(x, y)))

V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) {
	V4 r = 0.0;
	r += mul(s0_0, M4(6.038e-02, -1.027e-01, 5.822e-02, -6.061e-02, -7.843e-02, 5.764e-02, -4.207e-02, -6.612e-02, 5.435e-02, -1.535e-02, -2.172e-02, 6.898e-02, -6.557e-02, 8.073e-02, 1.011e-02, -4.904e-02));
	r += mul(s0_1, M4(8.764e-04, 1.018e-01, 1.772e-01, 1.661e-01, 6.140e-02, -2.055e-02, -1.707e-01, -7.450e-02, 7.612e-02, -1.036e-01, 1.102e-01, -1.187e-01, -1.174e-02, -3.453e-02, -9.480e-02, -9.441e-02));
	r += mul(s0_2, M4(-5.569e-02, -1.362e-01, 1.829e-01, 1.028e-01, 2.850e-02, -2.040e-03, 5.163e-02, 3.880e-02, 5.329e-02, 5.051e-03, -2.657e-02, -4.745e-02, -5.201e-02, -1.034e-02, 3.430e-02, -7.121e-02));
	r += mul(s0_3, M4(-1.999e-01, 2.646e-01, -1.668e-02, -2.097e-02, -3.757e-02, 2.681e-03, 9.334e-02, -7.001e-03, 1.230e-01, -3.327e-02, 3.168e-03, 2.902e-02, 6.001e-02, 9.742e-02, -1.138e-04, 8.002e-02));
	r += mul(s0_4, M4(4.957e-02, 2.881e-01, -2.932e-01, 9.982e-02, -5.757e-02, -1.387e-01, 1.029e-01, -1.149e-01, -9.208e-02, 2.486e-01, 3.498e-02, 4.900e-02, 4.180e-02, 8.201e-03, 1.992e-01, -1.461e-02));
	r += mul(s0_5, M4(5.227e-02, -3.034e-02, -2.078e-02, 3.595e-02, -8.179e-02, 2.010e-02, 1.053e-01, 1.640e-02, -1.606e-01, 3.057e-02, -4.679e-02, 9.908e-02, 4.368e-02, -1.294e-02, -4.690e-02, 8.246e-02));
	r += mul(s0_6, M4(2.268e-02, 1.556e-01, -1.868e-02, 3.933e-02, 3.993e-02, -8.431e-02, -6.621e-02, 4.407e-02, -2.419e-02, 8.918e-02, -4.738e-02, -7.020e-02, 4.749e-02, -1.842e-02, -5.654e-02, -8.227e-03));
	r += mul(s0_7, M4(-5.760e-04, 1.471e-01, -1.820e-01, 6.890e-02, 1.373e-01, 9.203e-03, -2.028e-01, 9.876e-02, 1.083e-01, 5.770e-02, -3.253e-02, 2.297e-02, 1.292e-02, 1.122e-01, -1.429e-01, -8.835e-02));
	r += mul(s0_8, M4(-5.292e-02, 1.342e-01, -2.273e-03, 2.506e-01, 1.349e-02, 3.801e-03, -1.132e-01, -8.694e-02, 1.308e-01, 1.180e-02, -1.627e-01, 1.411e-01, 3.735e-02, -1.071e-01, 1.332e-02, -7.277e-02));
	r += mul(s1_0, M4(2.520e-01, 1.005e-02, -6.599e-02, -1.435e-02, -8.251e-02, 9.219e-02, 2.099e-01, 4.397e-02, 7.254e-02, 5.108e-02, -6.084e-02, 2.280e-01, -7.666e-02, 2.139e-01, -1.559e-01, 3.090e-02));
	r += mul(s1_1, M4(-1.438e-01, 1.429e-01, -1.312e-01, 2.296e-02, -1.380e-01, -1.234e-02, -9.997e-02, 1.424e-01, -1.930e-01, -2.154e-01, -5.713e-03, -1.436e-01, -1.227e-01, -6.206e-02, 1.316e-01, 1.183e-01));
	r += mul(s1_2, M4(-3.133e-02, -7.789e-03, 9.705e-02, -2.099e-01, 2.683e-01, 8.855e-02, -1.552e-01, -2.798e-01, -4.940e-02, 7.202e-02, 4.606e-02, -1.568e-01, -1.705e-01, -2.370e-02, 6.812e-02, 5.503e-02));
	r += mul(s1_3, M4(-2.010e-01, 7.593e-02, 1.140e-01, -1.111e-01, 3.125e-01, 1.057e-03, -2.343e-01, 2.188e-01, 1.613e-01, -5.725e-02, 2.443e-01, 1.606e-01, -1.111e-01, -3.890e-02, -1.082e-01, 2.268e-01));
	r += mul(s1_4, M4(-1.969e-02, 1.231e-01, -1.682e-01, -2.341e-01, -2.476e-01, -2.521e-02, 1.433e-01, 2.585e-01, -5.905e-01, -6.658e-03, 3.786e-01, 1.818e-02, -3.195e-01, -4.331e-02, 2.499e-01, 3.572e-01));
	r += mul(s1_5, M4(-2.933e-02, 6.255e-02, 5.270e-02, -6.322e-02, -6.288e-01, 3.056e-02, 2.344e-01, -5.509e-01, 4.417e-02, -1.143e-01, 1.720e-01, -3.562e-01, -1.757e-02, -7.980e-02, -5.221e-02, 1.142e-01));
	r += mul(s1_6, M4(4.704e-02, -7.956e-03, -4.388e-02, 8.044e-02, 1.961e-01, 3.794e-02, -3.145e-01, 4.592e-02, 7.526e-02, 5.595e-02, -8.909e-02, -8.451e-02, -3.933e-02, 7.445e-02, -4.443e-01, -4.812e-02));
	r += mul(s1_7, M4(-6.361e-03, -2.334e-02, 6.683e-02, 5.801e-02, 4.036e-01, 3.348e-01, -5.469e-01, -4.894e-01, 7.236e-02, -1.876e-02, 3.989e-02, -1.081e-01, 2.781e-02, -4.224e-02, 7.930e-02, 8.278e-02));
	r += mul(s1_8, M4(7.127e-03, -7.018e-02, -1.531e-02, -2.631e-02, 1.443e-01, -1.601e-01, -2.977e-01, -3.354e-01, 2.859e-02, 3.597e-02, -6.660e-02, -9.328e-02, -2.760e-03, -1.686e-01, 4.671e-03, -5.109e-02));
	r += mul(s2_0, M4(1.738e-01, 2.499e-01, 1.767e-01, -2.066e-01, -2.718e-02, 5.185e-02, 1.130e-01, 4.579e-02, -5.349e-02, -1.729e-02, 9.773e-03, 7.695e-02, -4.065e-02, -3.152e-02, -5.521e-02, -4.779e-02));
	r += mul(s2_1, M4(1.050e-01, -2.569e-02, 1.166e-01, -4.488e-02, 5.872e-03, -1.715e-01, -4.873e-02, -8.728e-02, -5.655e-02, -5.162e-02, -2.014e-02, 5.109e-02, -2.352e-01, -6.745e-02, -4.318e-02, -1.529e-01));
	r += mul(s2_2, M4(-5.747e-02, -1.001e-04, -1.659e-01, -1.151e-02, 1.184e-01, 1.088e-02, 9.885e-02, -4.186e-02, 2.511e-03, -4.830e-02, -9.706e-02, -2.413e-02, -1.844e-02, 3.858e-02, -7.717e-03, -6.948e-02));
	r += mul(s2_3, M4(7.010e-02, -1.670e-02, 2.139e-01, 7.957e-02, -1.501e-01, 1.199e-02, 9.177e-02, -1.012e-01, 8.116e-03, -3.847e-02, -8.662e-02, 3.384e-02, 2.598e-02, 1.091e-01, 1.689e-02, -1.996e-02));
	r += mul(s2_4, M4(5.851e-01, -4.156e-02, -2.361e-02, -1.495e-01, -4.768e-03, 2.125e-02, -5.071e-02, -2.313e-02, 1.628e-01, -1.056e-01, 4.753e-02, -5.418e-02, -1.305e-01, -1.263e-01, 3.208e-02, -7.109e-03));
	r += mul(s2_5, M4(2.752e-01, -4.555e-04, -3.288e-01, 9.494e-02, 6.333e-02, 2.145e-01, -5.950e-02, -4.529e-02, -1.564e-01, 4.102e-02, 1.456e-01, 1.577e-01, -2.017e-01, -1.197e-02, -1.137e-01, -4.620e-02));
	r += mul(s2_6, M4(3.852e-01, -3.149e-02, -2.738e-02, -1.856e-02, -2.038e-02, -1.391e-02, 1.545e-01, 4.244e-02, -7.351e-02, -1.266e-01, 6.077e-02, 2.922e-02, -1.241e-01, 5.051e-02, -1.331e-01, 1.044e-01));
	r += mul(s2_7, M4(-1.448e-01, 1.017e-01, 1.255e-01, -1.116e-01, 6.711e-02, -4.391e-02, 4.815e-03, 1.760e-01, -8.878e-02, 5.888e-03, -1.435e-01, -1.693e-01, -2.145e-01, -6.326e-02, 1.285e-02, -7.705e-02));
	r += mul(s2_8, M4(-9.547e-02, -1.173e-01, 1.793e-01, -7.501e-02, 7.400e-02, -6.034e-03, 3.186e-02, 1.514e-01, 2.215e-02, -4.265e-02, 6.466e-03, 2.240e-02, -1.609e-01, 7.133e-02, -3.133e-02, -7.962e-03));
	r += mul(s3_0, M4(1.172e-02, 2.080e-01, 1.168e-01, -5.939e-02, -6.782e-02, -4.500e-02, 9.755e-04, 3.416e-02, 2.640e-01, 9.256e-02, 3.313e-02, 6.642e-02, -5.530e-02, -4.369e-02, 9.257e-02, -6.417e-02));
	r += mul(s3_1, M4(7.388e-02, -9.949e-02, -7.112e-02, -6.696e-02, -1.935e-02, -5.532e-02, -3.353e-02, 8.160e-02, 9.737e-02, -5.728e-02, -2.573e-02, 2.480e-01, -2.191e-01, 1.059e-01, 2.171e-01, 2.034e-02));
	r += mul(s3_2, M4(7.582e-02, 1.122e-01, -1.356e-03, -2.247e-01, 8.112e-02, 5.689e-02, -1.266e-01, -1.438e-01, 1.457e-01, 3.666e-02, -3.664e-02, -9.318e-02, 3.935e-02, 1.949e-02, -7.842e-02, -3.551e-02));
	r += mul(s3_3, M4(-1.352e-01, -2.188e-01, -8.137e-02, 1.343e-01, -9.011e-02, 1.735e-01, -2.651e-01, -4.401e-02, -1.104e-01, -1.712e-02, -4.738e-02, 9.005e-02, 2.003e-01, 7.993e-02, 2.341e-02, -5.732e-02));
	r += mul(s3_4, M4(-3.021e-01, -1.628e-01, -9.939e-02, -5.101e-02, 1.852e-01, 4.853e-02, -2.408e-01, 1.685e-01, -1.590e-01, -1.416e-01, 2.550e-01, 8.751e-02, -1.338e-01, -7.205e-02, 7.076e-02, 1.652e-01));
	r += mul(s3_5, M4(1.865e-02, -1.665e-02, 2.272e-02, 3.661e-02, -1.933e-01, 2.391e-01, -4.422e-01, -2.826e-02, -9.839e-02, -3.878e-03, -1.376e-01, 8.164e-03, 2.325e-02, 5.991e-02, -1.257e-01, -1.250e-01));
	r += mul(s3_6, M4(2.248e-02, -1.574e-01, 1.337e-01, 2.721e-02, -7.530e-02, 6.672e-02, -1.552e-01, -3.174e-02, -6.926e-03, -8.575e-02, 6.217e-02, -7.909e-02, 3.127e-02, 2.158e-01, -1.689e-01, -8.679e-02));
	r += mul(s3_7, M4(-1.810e-02, 1.414e-01, -1.642e-01, 1.957e-01, 9.396e-02, 9.682e-02, 6.155e-02, -5.372e-02, 3.441e-03, 1.170e-01, -4.898e-04, 1.152e-01, 1.871e-01, -2.607e-01, -9.338e-02, 1.715e-01));
	r += mul(s3_8, M4(-3.406e-02, -5.075e-02, 7.496e-02, -1.226e-01, -1.136e-02, 6.224e-02, -2.319e-01, 2.711e-02, 1.313e-01, 8.848e-02, -3.355e-01, -2.899e-01, 3.552e-02, 4.240e-02, -1.280e-01, -1.896e-01));
	r += mul(s4_0, M4(2.978e-03, 4.338e-02, 2.667e-01, -3.428e-02, 4.550e-02, -5.447e-02, -3.128e-01, -5.822e-02, 7.073e-02, 9.923e-02, -1.416e-01, -8.767e-02, -6.636e-02, 1.224e-01, -4.772e-02, 2.206e-02));
	r += mul(s4_1, M4(5.822e-02, 4.371e-02, 5.661e-02, 9.083e-04, -1.675e-01, 4.282e-02, 9.959e-02, 2.148e-02, 6.614e-02, -6.577e-02, 9.507e-02, 1.344e-01, -2.506e-02, -6.194e-02, 5.761e-02, -1.130e-01));
	r += mul(s4_2, M4(-6.944e-02, -5.566e-02, 1.012e-01, -4.955e-02, -4.260e-02, 4.159e-02, -1.214e-01, -1.117e-01, -6.724e-03, -8.405e-02, -1.590e-01, 6.350e-02, 5.316e-02, -2.193e-03, -6.465e-02, 6.182e-02));
	r += mul(s4_3, M4(-7.619e-02, -6.501e-02, 2.396e-01, 2.766e-02, -9.113e-02, -2.040e-01, 1.268e-01, -5.021e-02, -5.359e-02, -3.492e-03, 9.936e-02, 1.327e-01, -1.176e-02, 1.324e-01, -6.895e-02, -1.033e-01));
	r += mul(s4_4, M4(-9.050e-02, -1.741e-01, 1.353e-01, -2.499e-02, 1.499e-01, 6.837e-02, 1.544e-01, -2.328e-01, -1.001e-01, -3.200e-01, 2.555e-02, 3.163e-02, 5.167e-03, 6.599e-02, 5.332e-02, -2.383e-02));
	r += mul(s4_5, M4(-1.380e-01, -6.767e-02, 7.205e-02, 7.794e-02, 3.260e-02, 8.421e-03, 9.155e-02, 2.219e-03, -2.114e-01, -5.606e-02, 9.497e-02, 1.809e-01, 3.274e-02, -3.988e-03, -9.138e-02, -8.120e-02));
	r += mul(s4_6, M4(-2.992e-02, -2.485e-03, 2.118e-02, 1.017e-02, 9.404e-02, -4.484e-02, -2.044e-01, 3.509e-02, -8.506e-02, -7.819e-02, -2.538e-02, -2.135e-02, 1.035e-01, 8.211e-02, -6.053e-02, 2.719e-02));
	r += mul(s4_7, M4(-1.954e-02, -1.303e-01, 2.458e-02, 1.259e-01, 6.521e-02, 7.827e-02, -2.962e-02, -1.487e-01, 1.335e-01, -1.090e-01, 5.524e-02, 1.814e-01, -1.546e-01, 1.165e-01, 4.443e-04, -1.573e-01));
	r += mul(s4_8, M4(-3.494e-03, 4.970e-02, 4.422e-02, 8.782e-02, -5.605e-02, 2.573e-01, -2.386e-01, -1.946e-01, 2.648e-02, -1.001e-01, -5.948e-02, 1.535e-01, 3.174e-02, 8.339e-02, -5.591e-02, 1.059e-02));
	r += mul(s5_0, M4(3.668e-02, 5.965e-02, -1.721e-02, -3.068e-02, 1.906e-02, -1.586e-01, -1.670e-01, 1.607e-01, -1.702e-01, -1.249e-01, -2.854e-01, 2.574e-02, -1.970e-01, -1.641e-02, -8.082e-02, -5.110e-02));
	r += mul(s5_1, M4(-1.574e-01, 1.272e-01, 1.492e-02, -7.330e-03, -2.324e-01, -6.950e-02, 1.762e-02, 2.972e-02, -3.110e-01, -6.619e-03, -3.372e-02, -9.828e-02, -1.360e-02, -9.453e-02, 9.109e-02, -6.697e-02));
	r += mul(s5_2, M4(3.255e-02, 1.122e-01, 3.934e-02, -1.884e-01, -2.090e-01, -6.767e-02, 6.147e-02, 1.271e-01, -9.991e-02, -8.133e-02, -9.660e-02, -4.156e-02, 2.997e-02, -9.234e-02, -6.113e-02, -1.478e-01));
	r += mul(s5_3, M4(-5.160e-02, -1.590e-02, -1.875e-01, -4.392e-02, -5.295e-02, -1.335e-01, -4.673e-02, 5.958e-02, -6.313e-02, -5.531e-02, 1.475e-01, 1.547e-02, 1.027e-01, -6.958e-02, 2.418e-02, 2.403e-02));
	r += mul(s5_4, M4(9.424e-02, -8.101e-02, -2.912e-01, -1.108e-01, 1.939e-01, 3.821e-02, -5.703e-02, 1.513e-02, 9.871e-02, 4.350e-02, 2.875e-02, -1.627e-01, -2.624e-02, -1.394e-01, 1.132e-01, -1.424e-02));
	r += mul(s5_5, M4(-6.870e-02, -6.601e-02, -1.372e-01, -1.179e-01, 1.986e-01, 5.603e-02, 1.706e-01, -9.338e-03, 1.029e-01, -5.792e-02, 6.861e-02, -7.211e-02, -4.139e-02, -2.509e-01, -1.461e-01, -1.405e-01));
	r += mul(s5_6, M4(7.451e-02, 2.164e-01, -2.009e-01, 2.788e-02, 1.175e-01, -9.648e-02, -1.430e-02, -7.214e-02, 9.366e-02, -6.314e-03, -1.510e-01, -6.284e-03, -1.500e-01, 2.851e-02, 5.196e-02, 1.170e-01));
	r += mul(s5_7, M4(-3.499e-04, 8.393e-02, -1.515e-01, 5.736e-02, 3.603e-02, 1.352e-02, 9.950e-02, -2.798e-02, 2.069e-01, 7.262e-02, 4.129e-02, -1.399e-01, -1.792e-01, -3.952e-01, 1.173e-01, -4.999e-02));
	r += mul(s5_8, M4(-4.190e-02, 1.134e-01, -2.901e-01, -8.789e-02, -1.495e-01, 2.644e-01, -9.540e-02, -3.225e-01, -7.771e-02, 1.823e-01, -2.192e-01, -1.170e-01, -4.472e-03, -1.040e-01, 1.290e-01, 1.096e-02));
	r += mul(s6_0, M4(-1.807e-01, -3.467e-02, -1.978e-01, 4.916e-03, 6.145e-02, 1.576e-02, 1.923e-02, -1.277e-02, 2.114e-02, -3.700e-02, -3.673e-02, -9.642e-02, 1.059e-01, 6.235e-02, 6.436e-02, 2.042e-02));
	r += mul(s6_1, M4(-9.068e-02, 5.704e-02, -8.745e-02, -6.303e-02, 2.124e-01, -1.943e-01, -1.370e-01, -1.624e-01, -1.096e-01, 4.166e-02, 6.197e-02, -1.716e-02, 1.131e-01, 4.626e-02, 8.055e-02, 6.132e-02));
	r += mul(s6_2, M4(7.138e-02, -5.989e-02, 1.507e-01, 5.818e-02, 1.495e-01, -2.149e-02, -7.632e-02, -1.873e-01, 7.602e-02, 1.672e-02, 1.305e-01, 5.485e-02, -1.106e-01, 9.418e-02, 2.097e-01, -1.107e-01));
	r += mul(s6_3, M4(-7.939e-02, -9.415e-02, -3.253e-01, 5.550e-02, -5.333e-02, -6.900e-02, 2.532e-01, -1.236e-02, 6.154e-02, 2.696e-03, -1.090e-01, 9.496e-02, -5.117e-02, 2.633e-01, 9.791e-02, -5.712e-02));
	r += mul(s6_4, M4(-1.092e-02, 5.562e-02, 2.001e-02, -8.640e-02, 2.022e-01, 1.713e-01, 7.558e-02, -2.621e-02, -2.034e-01, -7.972e-02, -9.542e-02, -7.372e-02, -2.119e-02, -9.791e-02, -4.815e-02, 1.717e-01));
	r += mul(s6_5, M4(-6.813e-02, -1.132e-01, 9.465e-02, -2.221e-02, 8.347e-02, -2.863e-02, 1.618e-02, 1.241e-01, -1.080e-01, -4.588e-02, -1.048e-02, 8.343e-02, 2.000e-02, -7.991e-03, 3.295e-01, -7.222e-02));
	r += mul(s6_6, M4(-1.257e-01, 2.813e-02, -5.140e-02, -1.422e-01, 1.255e-01, 1.617e-01, -9.818e-02, 9.477e-03, -1.898e-03, 8.431e-02, 1.631e-01, -2.976e-02, 2.819e-01, 6.733e-02, 4.286e-02, 8.241e-02));
	r += mul(s6_7, M4(-1.021e-01, -1.888e-01, -8.428e-02, 1.146e-01, 1.510e-01, -5.692e-02, -1.291e-01, 5.237e-02, 2.558e-02, -3.653e-02, 1.264e-01, 3.667e-02, -9.369e-02, 1.182e-01, -2.564e-01, -4.997e-02));
	r += mul(s6_8, M4(4.274e-02, 9.755e-02, -9.820e-02, -5.409e-03, 9.648e-03, -3.442e-02, -1.963e-01, -9.613e-02, 8.596e-02, -2.486e-02, -7.731e-02, 1.119e-01, 3.031e-03, 1.189e-02, 7.198e-02, -2.010e-02));
	r += mul(s7_0, M4(-8.593e-02, 4.410e-02, 5.916e-02, 5.405e-02, 1.801e-02, 3.804e-02, 8.731e-02, 4.344e-02, 2.200e-01, -2.394e-02, 1.496e-01, 6.278e-02, -7.431e-02, 1.860e-01, 7.845e-02, -9.594e-02));
	r += mul(s7_1, M4(-1.069e-01, -2.582e-02, 5.910e-02, -6.761e-02, -1.506e-01, -8.254e-02, 8.958e-02, -4.935e-02, -1.770e-01, -2.038e-01, 7.928e-02, 3.037e-01, -1.013e-01, 2.359e-02, -1.057e-01, -5.003e-02));
	r += mul(s7_2, M4(6.604e-02, -8.159e-02, 9.681e-02, -9.276e-03, 4.673e-02, -1.199e-02, -2.188e-01, 4.027e-02, 1.614e-01, 4.145e-02, 2.027e-01, 4.471e-02, -5.706e-02, 8.516e-03, -2.315e-02, -5.171e-02));
	r += mul(s7_3, M4(5.693e-02, -2.617e-03, 9.846e-02, 1.033e-01, -1.299e-01, -6.217e-03, 1.042e-01, -1.079e-01, -2.066e-01, -2.701e-01, 3.056e-01, 9.355e-02, 7.681e-02, 1.426e-01, -1.712e-01, 9.973e-02));
	r += mul(s7_4, M4(1.308e-01, 5.535e-02, 1.887e-01, -7.625e-02, 4.354e-02, 4.574e-02, -7.604e-03, -1.578e-02, -1.550e-01, 1.512e-01, -3.109e-01, -3.903e-01, -1.085e-01, -1.300e-01, 8.819e-02, 3.659e-02));
	r += mul(s7_5, M4(-5.433e-02, 1.856e-04, 1.124e-01, -1.339e-01, -1.416e-01, -1.226e-02, -7.421e-02, -3.983e-02, -1.132e-01, 1.275e-01, -4.816e-02, -1.477e-01, 1.074e-01, 7.671e-02, 1.985e-01, 1.291e-01));
	r += mul(s7_6, M4(-3.036e-02, -1.725e-02, -2.685e-02, -2.915e-03, 3.075e-02, 1.702e-01, -9.196e-02, -1.142e-01, 6.406e-02, 7.094e-03, -1.987e-02, 9.400e-02, 6.958e-02, 4.125e-02, -2.776e-02, 2.524e-02));
	r += mul(s7_7, M4(8.227e-02, -1.415e-01, 3.321e-02, 1.146e-03, -5.870e-02, -4.962e-02, 4.423e-02, -8.562e-03, -1.255e-01, -2.399e-02, -9.390e-02, 2.133e-01, 9.043e-02, -1.159e-02, -1.988e-01, 5.948e-03));
	r += mul(s7_8, M4(3.438e-02, 1.674e-01, -9.081e-02, 1.104e-01, -8.589e-02, -1.508e-01, -6.714e-03, -1.057e-02, -3.694e-02, 9.586e-02, -8.628e-03, 1.749e-01, 1.191e-02, -7.848e-02, -1.482e-01, -2.125e-01));
	r += V4(2.042e-02, -1.567e-02, 1.777e-02, 8.206e-03);
	return r;
}

V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) {
	V4 r = 0.0;
	r += mul(s0_0, M4(-1.871e-01, -2.821e-02, -2.131e-01, -2.105e-02, 1.113e-02, -8.090e-02, -6.108e-02, -1.710e-02, -2.215e-02, -1.474e-03, 1.704e-02, 3.001e-02, -5.379e-02, 4.545e-02, -1.134e-01, -2.035e-01));
	r += mul(s0_1, M4(-9.495e-04, -1.423e-01, -2.280e-02, -2.646e-02, -4.118e-02, 1.052e-01, -6.480e-02, -3.867e-02, -1.676e-02, -2.095e-02, 7.072e-02, 1.037e-01, 1.288e-01, -1.114e-02, 1.837e-02, -2.369e-02));
	r += mul(s0_2, M4(-1.243e-02, -1.434e-01, 8.768e-03, 2.108e-01, 1.194e-01, 5.633e-02, -1.265e-02, -9.167e-02, -2.684e-02, -1.275e-01, -3.768e-02, -5.514e-02, -4.753e-02, 3.975e-02, 1.264e-01, 3.772e-02));
	r += mul(s0_3, M4(-5.436e-02, 1.387e-01, -4.905e-02, 1.009e-02, 2.186e-04, -9.563e-02, -7.090e-02, -1.014e-01, 2.932e-03, 1.358e-01, -2.002e-02, 3.347e-02, 7.167e-02, 1.143e-01, 1.485e-01, -9.345e-02));
	r += mul(s0_4, M4(-1.239e-01, -6.966e-02, 1.447e-01, -1.225e-01, -6.644e-03, -6.150e-02, -1.980e-01, 2.807e-02, -3.973e-03, 1.241e-02, 1.473e-01, -2.119e-01, -5.985e-02, -1.174e-02, -1.191e-01, -1.458e-01));
	r += mul(s0_5, M4(-1.331e-01, -8.315e-02, -1.300e-01, -1.763e-01, -6.882e-02, -4.095e-03, -3.071e-02, -1.962e-01, -1.584e-03, -1.034e-01, -4.404e-02, -9.630e-02, -1.096e-02, -3.780e-02, -3.443e-02, 5.438e-02));
	r += mul(s0_6, M4(-7.963e-02, -3.774e-02, 2.984e-02, 7.809e-02, -5.955e-02, -3.516e-02, -2.589e-02, -6.405e-02, 1.241e-02, -4.356e-02, 2.731e-02, -4.592e-02, -1.681e-02, -2.013e-02, 1.026e-04, 6.043e-02));
	r += mul(s0_7, M4(6.424e-02, -1.309e-01, -1.238e-01, -4.344e-02, 1.308e-03, -1.141e-01, -5.711e-02, -1.666e-01, -3.539e-03, -1.410e-02, -6.655e-02, -2.380e-02, 1.941e-02, -7.815e-02, 1.136e-01, -3.781e-02));
	r += mul(s0_8, M4(-2.276e-02, -2.375e-01, -2.214e-02, 5.372e-03, 6.425e-03, -7.252e-02, 7.044e-02, -8.419e-03, -7.296e-02, -4.230e-02, 3.871e-03, 3.827e-02, -3.834e-02, 7.722e-02, 2.097e-03, -8.225e-02));
	r += mul(s1_0, M4(4.056e-02, -3.233e-02, -6.508e-02, -6.274e-02, -1.572e-01, -1.340e-01, 6.801e-02, 8.273e-02, 8.778e-02, 8.204e-02, 1.450e-01, 1.294e-01, 6.334e-02, -2.436e-01, -2.443e-01, 1.411e-01));
	r += mul(s1_1, M4(1.442e-01, 1.466e-03, -9.557e-02, -5.947e-02, 1.829e-01, 2.630e-01, -6.553e-02, 1.282e-02, 2.104e-01, 7.472e-03, 2.137e-01, -1.518e-02, 1.268e-01, -3.356e-01, -5.628e-02, -1.184e-01));
	r += mul(s1_2, M4(1.325e-01, -6.010e-02, -8.943e-03, 1.608e-02, -4.346e-02, 2.257e-02, 2.482e-02, -1.306e-01, 1.403e-01, 1.675e-01, -3.712e-02, -3.032e-02, -1.534e-01, -2.324e-01, 9.234e-02, 6.583e-02));
	r += mul(s1_3, M4(7.828e-02, -4.908e-02, -3.408e-02, 4.230e-02, 2.449e-02, 3.121e-01, -8.996e-02, -8.823e-02, -1.253e-01, 1.874e-01, 1.196e-01, 1.294e-01, -1.330e-01, 2.173e-01, 6.471e-02, 2.821e-02));
	r += mul(s1_4, M4(2.224e-02, -2.149e-02, 2.666e-01, -9.329e-02, 2.452e-01, -5.926e-01, -1.972e-01, 1.340e-02, 7.994e-03, -1.694e-01, 5.613e-01, -3.168e-01, -1.292e-01, 1.472e-02, -1.207e-01, -1.759e-01));
	r += mul(s1_5, M4(-2.011e-02, 4.120e-02, 8.215e-02, -2.399e-01, -2.318e-01, -8.668e-02, 1.445e-01, 1.350e-01, -1.501e-01, -1.454e-02, -8.517e-02, -1.553e-01, 3.227e-02, 1.248e-02, 5.942e-02, 1.594e-01));
	r += mul(s1_6, M4(7.560e-03, 4.543e-02, -6.540e-02, 4.231e-02, 2.725e-02, 1.371e-02, 2.006e-01, 1.437e-01, -4.834e-02, -1.133e-02, 8.049e-02, -2.226e-02, -8.326e-02, -8.656e-02, -7.931e-02, 4.502e-02));
	r += mul(s1_7, M4(-6.518e-02, 8.934e-02, -1.219e-01, -4.372e-02, 4.623e-02, -3.027e-01, -1.326e-01, -1.512e-01, -3.234e-02, -6.440e-02, 5.738e-02, -4.798e-03, 6.094e-02, 2.635e-02, 1.314e-01, -7.782e-02));
	r += mul(s1_8, M4(6.408e-02, 4.318e-02, -1.060e-02, -6.661e-02, -2.892e-01, -5.121e-01, 2.127e-01, -2.559e-01, -1.175e-01, -1.270e-01, 8.104e-03, -6.716e-02, -5.801e-02, 1.361e-01, -2.497e-02, -3.579e-02));
	r += mul(s2_0, M4(-2.161e-01, 1.363e-01, 8.227e-02, -6.284e-02, -5.903e-02, 5.772e-03, 2.087e-02, -1.065e-01, -1.529e-02, 1.757e-01, 6.662e-02, -2.364e-02, -6.565e-02, 7.983e-02, -7.751e-02, -7.226e-02));
	r += mul(s2_1, M4(-2.017e-02, 4.055e-02, 9.606e-02, -2.980e-02, -3.927e-02, 1.147e-01, 2.286e-02, 2.563e-01, 3.504e-02, 1.352e-01, 5.012e-02, -3.308e-04, -1.470e-01, -3.391e-02, 1.755e-01, 9.478e-02));
	r += mul(s2_2, M4(-1.474e-01, -1.028e-01, 7.540e-02, -1.023e-01, -7.313e-02, 1.066e-01, 5.016e-02, -1.897e-01, -5.914e-02, 7.808e-02, -4.346e-02, -2.776e-02, 2.904e-02, 8.417e-02, -4.232e-02, 1.674e-01));
	r += mul(s2_3, M4(2.335e-01, -2.842e-02, -5.739e-02, -1.221e-01, 2.011e-02, 3.406e-02, 1.656e-01, -1.083e-01, 3.551e-02, -9.249e-02, 5.859e-02, 3.906e-02, 7.709e-02, 1.910e-01, 2.195e-02, 7.340e-02));
	r += mul(s2_4, M4(7.809e-02, -1.515e-02, 1.817e-01, -7.771e-02, 9.634e-02, 1.301e-01, -3.744e-02, 1.008e-01, -4.234e-02, 2.875e-03, -9.835e-04, 3.331e-02, 2.310e-01, -2.396e-01, -2.066e-01, -7.600e-02));
	r += mul(s2_5, M4(2.134e-02, -4.246e-02, 7.435e-03, -9.941e-03, 2.696e-02, -1.199e-01, -6.648e-03, -6.885e-02, 6.191e-03, -4.447e-02, 1.650e-02, 9.745e-02, 1.988e-01, -1.938e-01, 2.823e-01, 1.705e-01));
	r += mul(s2_6, M4(1.660e-01, -2.599e-01, 4.331e-02, -1.877e-01, 6.713e-02, 1.880e-02, 7.058e-03, -4.739e-02, -1.476e-02, 2.851e-02, -2.477e-02, -2.204e-03, 1.588e-02, 7.779e-03, -1.553e-01, 3.717e-02));
	r += mul(s2_7, M4(1.023e-01, -1.597e-02, -6.167e-02, 6.849e-02, 4.146e-02, 7.833e-03, 4.950e-02, 1.834e-03, 2.423e-03, 1.970e-02, 2.839e-02, -5.577e-02, -1.964e-01, -1.136e-01, 7.158e-03, -4.833e-02));
	r += mul(s2_8, M4(-6.461e-02, 1.330e-01, -9.992e-03, 3.494e-02, 1.050e-02, 3.294e-02, -7.505e-02, 1.301e-03, 6.948e-02, -3.319e-03, 4.139e-02, -1.808e-02, 1.410e-01, -7.249e-02, -9.410e-02, -2.916e-03));
	r += mul(s3_0, M4(9.794e-02, -8.956e-02, 9.463e-03, -3.120e-02, -1.004e-01, -1.610e-01, -4.092e-02, -2.509e-01, -9.779e-03, -2.849e-01, 9.762e-02, 3.390e-02, -7.858e-02, -2.126e-02, -8.321e-02, -4.915e-02));
	r += mul(s3_1, M4(1.035e-03, -5.190e-02, 9.745e-02, 2.534e-01, -1.810e-01, 2.817e-01, -1.096e-01, 2.089e-01, -9.686e-02, -2.929e-01, 2.868e-02, 1.402e-01, -8.799e-02, -2.324e-02, 7.078e-02, -2.115e-03));
	r += mul(s3_2, M4(9.160e-02, 2.894e-02, -8.441e-03, -1.818e-01, -1.529e-01, 9.104e-02, 6.442e-02, -2.131e-01, -3.002e-01, -1.751e-01, -1.274e-01, 7.683e-02, 2.336e-02, 6.137e-02, -2.225e-02, 9.249e-02));
	r += mul(s3_3, M4(-9.086e-02, -1.176e-01, -2.192e-01, 2.074e-02, 9.874e-02, 7.857e-02, 5.903e-02, -1.617e-01, -1.056e-01, -2.939e-01, 1.331e-02, -2.528e-02, -9.598e-02, -1.886e-02, 1.347e-02, -2.747e-02));
	r += mul(s3_4, M4(3.116e-02, -1.008e-01, 1.747e-01, 2.508e-02, 1.102e-01, 8.567e-02, -1.714e-01, -3.481e-02, -2.085e-01, -3.166e-02, -9.038e-02, 1.993e-01, 1.475e-01, -1.922e-01, -9.841e-02, -2.740e-02));
	r += mul(s3_5, M4(-2.964e-02, 3.304e-02, -5.346e-02, -3.595e-02, -1.856e-02, -2.969e-01, -1.847e-01, -4.361e-02, -1.784e-01, 9.786e-02, 1.694e-01, 1.664e-01, 7.496e-02, 2.784e-02, 1.145e-01, -2.746e-02));
	r += mul(s3_6, M4(-4.400e-02, 2.339e-02, 6.149e-02, 1.891e-03, 1.015e-01, -3.804e-03, 1.096e-01, -7.637e-02, -7.288e-02, -9.137e-02, -7.551e-02, 8.777e-03, -6.516e-02, 1.604e-02, 4.501e-02, -5.608e-02));
	r += mul(s3_7, M4(3.896e-03, -7.334e-02, -3.673e-02, 2.116e-03, 1.060e-01, 2.827e-03, 5.156e-02, -1.699e-01, -3.804e-02, -1.013e-02, -2.048e-01, 1.115e-01, -5.598e-02, 1.942e-01, -3.608e-02, -9.662e-03));
	r += mul(s3_8, M4(-1.038e-01, 4.401e-02, 1.331e-02, -1.917e-02, 9.200e-02, 1.028e-01, 3.215e-02, -4.704e-02, -1.374e-01, -1.129e-01, -8.069e-02, -6.331e-02, -2.008e-02, 1.892e-02, 7.121e-03, 1.436e-02));
	r += mul(s4_0, M4(-7.756e-02, 2.062e-02, 4.399e-02, 1.107e-02, 6.661e-02, -5.046e-03, 1.393e-01, -6.560e-02, -1.426e-01, -1.762e-01, 8.760e-03, -1.763e-02, -5.790e-02, -5.544e-02, -7.877e-02, -5.698e-02));
	r += mul(s4_1, M4(-6.876e-02, -5.972e-02, 3.602e-02, 1.548e-01, -1.600e-01, 1.551e-01, -9.600e-02, -2.685e-01, -2.378e-02, -4.005e-02, -3.964e-02, 2.882e-01, -5.547e-02, 1.104e-01, 8.137e-02, 1.379e-01));
	r += mul(s4_2, M4(-9.239e-02, 7.299e-02, 4.045e-02, 5.511e-03, -1.197e-02, 2.754e-02, 2.256e-01, 1.320e-02, 2.351e-02, -1.223e-01, 5.086e-02, 7.153e-02, 5.949e-02, 2.832e-03, -5.051e-03, 1.528e-02));
	r += mul(s4_3, M4(-1.235e-01, 6.233e-02, -8.211e-02, -1.093e-01, -1.089e-01, 7.827e-03, 1.260e-01, 3.940e-02, -1.223e-01, 2.037e-02, -3.039e-03, -1.007e-02, -1.453e-01, 1.705e-01, 5.045e-02, -6.679e-02));
	r += mul(s4_4, M4(-1.850e-01, 1.124e-01, -2.744e-02, 4.633e-01, -1.888e-02, -1.497e-01, 2.819e-01, -9.535e-02, 4.777e-02, 9.229e-03, -1.721e-01, 1.564e-01, -2.240e-01, 8.776e-02, -1.616e-01, 2.031e-01));
	r += mul(s4_5, M4(2.790e-02, 6.465e-02, 1.183e-01, 6.803e-02, 2.085e-01, 8.519e-02, 5.526e-02, 1.918e-02, 9.109e-02, 1.322e-01, 1.707e-01, 9.906e-02, 6.375e-02, 1.046e-01, 4.839e-02, -7.507e-02));
	r += mul(s4_6, M4(-5.825e-02, 1.863e-02, -2.560e-02, -1.343e-01, -6.049e-02, -8.857e-02, 6.271e-02, -8.805e-02, 4.986e-02, 1.458e-01, 1.089e-01, -1.048e-02, -3.973e-02, -8.065e-02, 3.775e-02, -3.309e-02));
	r += mul(s4_7, M4(-9.327e-02, 4.840e-02, -1.265e-01, 1.491e-01, 7.817e-02, 4.162e-02, 1.440e-01, 1.603e-01, -5.780e-02, -1.189e-01, -7.263e-03, -1.101e-01, -1.226e-02, -3.063e-02, -3.353e-02, 2.052e-01));
	r += mul(s4_8, M4(1.751e-02, -1.135e-01, 1.571e-02, -2.205e-02, -3.998e-02, -1.429e-01, -2.788e-02, -7.556e-02, -7.037e-02, 1.664e-01, 9.417e-02, 4.116e-02, 1.210e-02, -6.077e-02, -3.109e-03, -9.682e-02));
	r += mul(s5_0, M4(4.328e-02, -2.640e-03, 1.726e-02, -6.619e-02, -7.153e-02, -5.347e-03, 1.232e-01, 1.862e-03, 1.355e-04, -5.360e-02, -9.326e-03, -1.251e-02, 2.813e-02, 5.841e-02, -3.303e-02, -1.476e-01));
	r += mul(s5_1, M4(-8.970e-03, -9.200e-02, -1.182e-01, 8.614e-02, 1.028e-02, -3.642e-02, -4.635e-02, -1.499e-01, -1.331e-01, 1.114e-01, -1.823e-01, -1.963e-01, -1.437e-01, -6.405e-02, 1.130e-01, -1.615e-02));
	r += mul(s5_2, M4(3.005e-03, 1.274e-01, 2.344e-02, 1.668e-02, -5.464e-02, 5.698e-02, 4.789e-02, -9.728e-02, 1.634e-01, -2.751e-01, 7.409e-03, -6.518e-02, 4.952e-02, 1.257e-02, 4.623e-03, -9.371e-02));
	r += mul(s5_3, M4(-9.059e-02, -1.951e-02, -3.413e-02, -8.996e-02, 5.690e-02, 4.996e-02, -9.201e-02, 1.690e-01, 1.255e-01, 6.144e-02, 6.561e-02, 4.638e-02, -4.952e-02, 1.578e-01, 2.006e-02, -1.715e-01));
	r += mul(s5_4, M4(7.592e-02, 7.317e-02, -1.401e-01, 9.003e-04, 1.105e-01, 6.094e-02, 6.376e-02, -5.125e-02, 1.325e-01, 3.113e-01, -9.730e-02, -1.553e-01, -2.130e-01, 2.451e-01, 4.230e-02, 1.606e-01));
	r += mul(s5_5, M4(5.599e-02, -2.547e-02, 3.313e-02, -7.994e-02, -5.691e-02, 9.699e-02, -7.989e-02, -1.939e-01, -2.585e-02, 1.913e-01, -4.057e-02, -7.749e-02, 1.399e-01, 2.816e-01, 9.736e-02, -1.271e-02));
	r += mul(s5_6, M4(-3.046e-02, -5.232e-02, 6.563e-02, 9.402e-03, -9.218e-02, 8.404e-03, -3.375e-02, -9.036e-02, 4.413e-02, 3.919e-02, -2.495e-02, -1.239e-03, -3.065e-02, -2.916e-02, 4.007e-03, -2.579e-01));
	r += mul(s5_7, M4(-7.098e-03, -1.860e-03, -1.707e-01, 5.445e-03, 1.281e-01, 1.628e-01, 1.225e-01, 7.790e-02, -1.567e-03, 7.209e-02, 1.182e-01, -9.290e-02, -1.202e-01, 2.262e-01, -2.065e-02, 1.571e-01));
	r += mul(s5_8, M4(9.056e-02, -1.256e-02, 5.825e-02, -7.197e-02, -5.779e-02, -1.845e-01, -1.186e-01, -1.548e-01, -1.480e-01, 1.068e-01, -1.482e-01, -5.204e-02, -2.760e-02, -8.757e-02, 1.002e-01, -1.388e-02));
	r += mul(s6_0, M4(7.393e-03, 1.422e-01, -4.407e-02, 2.375e-02, 7.300e-02, 7.536e-02, 4.936e-02, 8.964e-02, 3.491e-03, 1.315e-02, 4.104e-02, 7.586e-02, 5.862e-02, 9.637e-02, -8.634e-02, -1.815e-02));
	r += mul(s6_1, M4(5.447e-02, -4.146e-02, -1.955e-01, 3.767e-02, -8.846e-02, -2.408e-02, 7.166e-02, 9.727e-03, 8.169e-02, -2.903e-03, -8.014e-02, -2.011e-02, 7.493e-02, -1.732e-02, 1.244e-01, 5.136e-02));
	r += mul(s6_2, M4(8.524e-02, -2.271e-02, -4.364e-02, 3.690e-02, -9.154e-02, 6.250e-02, 4.295e-02, -2.384e-01, 8.008e-02, -7.720e-06, 1.023e-01, -9.540e-02, -2.173e-01, 5.006e-02, -5.404e-02, 1.215e-01));
	r += mul(s6_3, M4(5.341e-02, -1.380e-02, 4.311e-02, -6.630e-02, -3.448e-02, -6.397e-02, 2.076e-02, 1.184e-02, 2.474e-02, -1.257e-01, -7.047e-02, 6.348e-02, 1.586e-01, 1.115e-01, 3.230e-02, -1.733e-01));
	r += mul(s6_4, M4(4.991e-02, -1.141e-01, -1.378e-01, -6.951e-03, 1.873e-01, 4.493e-02, -4.137e-02, -4.430e-02, 1.579e-01, -3.852e-02, 1.640e-01, 9.429e-02, 1.355e-01, 1.617e-01, -5.072e-01, -6.408e-02));
	r += mul(s6_5, M4(6.064e-03, -6.852e-02, -1.470e-01, 8.329e-03, 7.775e-03, -9.763e-02, -5.572e-02, 1.070e-01, 9.256e-02, 1.398e-01, -9.383e-02, -1.502e-02, -6.022e-02, 1.013e-01, 1.429e-02, -1.039e-02));
	r += mul(s6_6, M4(-8.822e-02, 4.282e-03, -7.599e-02, 4.053e-02, -2.953e-02, 3.238e-02, 1.071e-01, 5.918e-03, -1.500e-02, 8.788e-03, -2.782e-02, 3.525e-02, 7.210e-02, -1.199e-01, 2.845e-02, -2.940e-01));
	r += mul(s6_7, M4(-8.471e-02, 1.662e-01, -6.165e-02, 3.814e-02, 7.629e-02, 2.466e-02, 7.141e-02, -1.585e-03, -1.659e-01, 4.623e-02, -9.034e-02, 3.881e-03, -2.486e-01, 1.006e-01, 3.395e-02, -1.782e-02));
	r += mul(s6_8, M4(-5.127e-03, -6.243e-03, -1.914e-01, 2.034e-02, 1.134e-01, -2.337e-01, 8.303e-02, 5.794e-02, 5.706e-02, 8.810e-02, -1.217e-02, -9.543e-02, -5.800e-02, 1.709e-02, -5.711e-02, -1.136e-02));
	r += mul(s7_0, M4(1.130e-01, 8.798e-02, 2.158e-02, -8.030e-02, 1.381e-01, 1.055e-02, -6.276e-03, -3.859e-02, 3.457e-02, -1.523e-01, 1.370e-01, 4.508e-02, -7.988e-02, 5.912e-02, -5.479e-02, -6.642e-02));
	r += mul(s7_1, M4(1.081e-01, -1.007e-03, 1.322e-02, -8.172e-02, -1.023e-02, 9.431e-02, 8.142e-02, 2.190e-02, 3.612e-01, -2.514e-01, -1.085e-01, 1.408e-01, 7.889e-02, -1.442e-02, 1.788e-01, -7.563e-03));
	r += mul(s7_2, M4(7.983e-02, 4.121e-02, 1.014e-01, 5.905e-02, 3.933e-02, -2.331e-02, 3.376e-02, -1.003e-01, -1.513e-01, -1.039e-01, -9.056e-02, -3.042e-01, 3.447e-02, -1.100e-01, -9.911e-02, -7.386e-02));
	r += mul(s7_3, M4(7.042e-02, 7.651e-03, 6.423e-02, -4.223e-02, -9.795e-02, 1.053e-01, -6.054e-02, -9.443e-02, -2.108e-01, 7.588e-02, -1.837e-01, 4.932e-02, 6.889e-02, -4.395e-02, 2.615e-02, 9.409e-03));
	r += mul(s7_4, M4(5.884e-02, -7.014e-03, 1.347e-02, -6.053e-02, 1.090e-01, 1.379e-01, -2.118e-01, -1.471e-01, -1.746e-02, 2.390e-01, 2.742e-01, 1.095e-02, 1.466e-01, 8.033e-02, -1.313e-01, -1.958e-01));
	r += mul(s7_5, M4(7.272e-03, 1.305e-02, -1.297e-01, -9.173e-02, -5.465e-02, 8.300e-02, 3.174e-02, 1.459e-01, 5.363e-02, 2.652e-01, 3.240e-02, -1.516e-01, -1.920e-02, 8.674e-02, -2.512e-02, 8.875e-02));
	r += mul(s7_6, M4(-9.080e-02, -9.108e-02, -2.072e-02, -7.774e-02, -1.248e-01, 6.572e-02, 1.437e-01, -1.314e-01, 2.497e-02, 2.202e-01, 1.100e-01, 2.945e-02, -2.824e-02, -5.907e-02, -4.124e-02, 9.261e-02));
	r += mul(s7_7, M4(-6.926e-02, 2.453e-01, 1.731e-01, -4.591e-03, -1.366e-01, -1.971e-02, -1.156e-01, -1.253e-03, -1.535e-01, 2.887e-02, -1.003e-01, 8.991e-02, -3.469e-02, 1.163e-01, 1.835e-01, -3.614e-02));
	r += mul(s7_8, M4(1.541e-02, -6.370e-02, -7.276e-02, 9.494e-03, -4.712e-02, 1.327e-02, 4.455e-02, -9.613e-02, 1.129e-01, -2.214e-01, -7.553e-02, 5.877e-03, -7.342e-02, 4.476e-02, 5.164e-02, -2.974e-02));
	r += V4(1.081e-02, 1.104e-03, 2.498e-03, 2.111e-03);
	return r;
}

V4 f2(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) {
	V4 r = 0.0;
	r += mul(s0_0, M4(-9.523e-02, 7.526e-03, -2.074e-01, 4.818e-02, -5.945e-02, -3.677e-02, -1.007e-01, 2.116e-02, 1.844e-01, -2.631e-02, 4.416e-02, 1.048e-01, 4.360e-02, -7.908e-03, -4.849e-02, -6.085e-02));
	r += mul(s0_1, M4(1.516e-01, 5.656e-02, -8.160e-02, 1.651e-02, -1.158e-01, 3.780e-02, 1.881e-01, 2.673e-02, 1.120e-01, 3.675e-02, -8.565e-02, -3.982e-02, -7.177e-02, -9.332e-02, 8.668e-02, -1.560e-01));
	r += mul(s0_2, M4(1.006e-01, -7.895e-02, -1.341e-01, 5.189e-02, 4.895e-02, 4.167e-02, -1.296e-01, -6.457e-02, 2.716e-02, 7.833e-02, 8.289e-02, -1.103e-03, -9.103e-02, 1.081e-01, 2.987e-02, 6.692e-03));
	r += mul(s0_3, M4(3.345e-02, -6.734e-02, 2.309e-02, 5.627e-02, 8.582e-02, -2.582e-02, 4.288e-02, -4.792e-02, -5.124e-03, 1.531e-02, 2.494e-02, -3.836e-04, 1.338e-01, -4.618e-02, 8.606e-02, 8.540e-02));
	r += mul(s0_4, M4(-9.091e-02, -2.635e-01, 1.093e-01, 2.452e-01, -6.538e-02, 7.022e-02, 4.870e-02, -1.239e-01, 1.271e-02, 4.693e-03, 1.197e-01, 5.333e-02, -6.442e-02, 6.737e-02, 6.474e-02, -1.860e-01));
	r += mul(s0_5, M4(1.886e-01, 1.379e-01, 7.388e-02, 9.951e-02, 5.822e-02, 9.415e-03, 1.483e-01, 9.551e-02, -4.265e-02, 7.647e-02, -7.110e-02, 1.301e-01, -4.740e-02, 2.135e-02, -3.256e-02, 7.272e-02));
	r += mul(s0_6, M4(1.471e-02, -2.215e-02, 1.118e-02, -4.224e-03, -9.526e-03, 1.056e-01, 6.225e-02, 6.163e-03, -1.199e-02, 3.090e-02, 1.100e-01, 4.367e-02, -6.287e-02, 2.110e-03, 4.653e-02, 2.634e-03));
	r += mul(s0_7, M4(-9.156e-02, 6.332e-02, 8.221e-02, -2.532e-02, 9.286e-02, 1.789e-01, -7.587e-02, 3.588e-03, 1.320e-01, 3.023e-02, -7.919e-02, 9.182e-03, 4.106e-02, 1.427e-02, -1.481e-01, -3.329e-02));
	r += mul(s0_8, M4(-8.374e-02, 5.366e-02, -2.426e-01, 4.147e-03, 5.590e-02, -5.014e-02, -7.904e-03, -8.100e-02, -5.368e-02, 7.723e-03, -2.304e-02, 6.514e-02, 2.844e-03, 3.521e-02, -1.951e-02, 9.246e-03));
	r += mul(s1_0, M4(3.039e-02, -1.293e-03, 1.293e-01, -3.167e-02, -1.130e-01, -6.094e-02, -1.905e-01, 9.723e-02, 6.267e-02, 4.243e-06, 2.944e-01, -8.186e-02, 1.090e-01, 1.586e-02, 1.518e-01, -8.095e-02));
	r += mul(s1_1, M4(6.135e-02, 6.516e-02, -7.528e-02, -4.670e-02, -2.456e-01, -5.538e-02, 1.169e-01, -2.047e-01, 1.181e-02, 1.002e-02, 2.542e-01, -4.795e-01, 2.283e-01, -1.144e-01, -1.208e-01, 2.578e-02));
	r += mul(s1_2, M4(5.741e-02, 8.183e-02, -8.066e-02, 9.448e-02, 7.245e-02, 4.110e-02, 7.466e-02, 1.324e-01, 1.988e-01, -2.446e-01, 2.133e-01, -2.613e-02, -2.349e-02, -1.161e-01, 6.522e-02, 1.124e-01));
	r += mul(s1_3, M4(-9.236e-02, -9.255e-02, -1.040e-01, 4.370e-02, 7.208e-02, 1.923e-01, 1.063e-02, 7.156e-02, -6.318e-02, 2.311e-01, -1.186e-01, -5.819e-02, 1.928e-01, 1.622e-01, 1.470e-01, 1.048e-01));
	r += mul(s1_4, M4(-6.268e-02, -1.375e-01, 6.604e-03, 2.802e-02, -3.740e-02, -5.399e-02, -1.374e-01, 2.602e-01, -1.768e-02, 3.827e-01, 6.387e-01, 2.928e-02, 5.753e-02, 1.206e-01, 3.682e-03, 4.942e-02));
	r += mul(s1_5, M4(4.952e-02, 2.523e-01, 3.339e-02, 3.963e-02, -3.530e-01, -3.700e-01, -1.063e-01, -1.958e-01, 1.963e-01, -6.659e-02, 3.456e-01, -3.005e-02, -1.509e-01, -1.022e-01, 1.169e-01, -4.150e-02));
	r += mul(s1_6, M4(-1.231e-01, 1.060e-01, 1.328e-02, -8.564e-02, 3.125e-01, 2.742e-03, -7.321e-02, -5.935e-03, -5.441e-02, 2.168e-01, 2.433e-01, -1.589e-01, 1.178e-01, -1.255e-01, 8.176e-02, -8.035e-02));
	r += mul(s1_7, M4(-8.170e-02, 1.597e-01, 1.886e-01, -7.304e-02, 3.819e-01, 7.933e-02, 1.686e-01, -2.394e-02, -7.509e-02, -2.816e-02, 3.992e-02, -3.778e-02, 3.477e-02, -5.884e-02, -7.473e-02, -1.785e-03));
	r += mul(s1_8, M4(-7.482e-02, 1.184e-02, -3.933e-03, -4.874e-02, 6.113e-02, 3.012e-01, 3.709e-02, 1.862e-01, 5.347e-04, -1.135e-01, -8.545e-02, -7.592e-02, 1.704e-02, 1.547e-02, 1.187e-01, -1.872e-02));
	r += mul(s2_0, M4(-1.486e-01, 8.789e-02, 2.623e-02, 1.376e-01, -1.004e-03, 6.926e-02, 4.750e-02, -7.964e-03, 5.269e-02, -4.034e-02, 1.346e-01, 4.845e-03, -1.978e-02, -2.621e-02, -6.480e-02, -3.674e-02));
	r += mul(s2_1, M4(1.335e-01, 2.026e-01, -1.693e-01, -1.756e-01, -1.606e-01, 1.774e-01, -9.483e-03, -2.651e-02, -1.382e-01, -2.495e-01, 5.076e-03, -1.924e-02, -1.548e-02, 3.372e-02, 6.016e-02, -4.393e-02));
	r += mul(s2_2, M4(5.384e-02, -4.307e-02, 3.152e-02, 3.141e-02, 2.630e-01, 1.072e-02, 7.661e-02, -8.139e-03, -1.391e-01, 6.246e-02, -7.113e-02, -5.639e-02, -4.102e-02, 1.321e-02, -3.045e-02, -4.878e-02));
	r += mul(s2_3, M4(-5.485e-02, -5.021e-02, -9.393e-02, 2.505e-01, 6.772e-02, 3.911e-02, -1.625e-01, 6.379e-02, -7.047e-02, -1.624e-02, 8.427e-02, -9.974e-02, -2.270e-02, -3.061e-02, 2.650e-01, -1.225e-01));
	r += mul(s2_4, M4(1.345e-01, -3.003e-01, 8.125e-02, -7.746e-02, -8.684e-02, 1.411e-01, 9.097e-02, -1.195e-01, -7.510e-02, 1.460e-01, -2.291e-01, -1.175e-01, -4.250e-02, 1.536e-01, -3.261e-02, -4.076e-01));
	r += mul(s2_5, M4(-1.581e-01, 3.753e-02, -1.418e-02, -6.127e-02, 5.118e-02, 2.163e-02, -2.048e-02, -2.313e-02, -1.666e-01, 7.394e-02, -1.726e-01, -1.422e-01, -1.639e-01, 2.895e-01, -4.619e-02, -6.550e-02));
	r += mul(s2_6, M4(8.274e-02, -1.434e-01, 1.279e-02, -5.499e-02, 3.480e-02, -9.788e-02, -1.559e-02, -4.615e-02, -1.374e-02, 2.087e-02, 1.207e-02, 1.965e-02, -7.874e-02, 2.656e-02, -1.384e-01, 5.413e-03));
	r += mul(s2_7, M4(-1.188e-01, -1.006e-01, -3.870e-03, 1.726e-02, -4.400e-02, -4.391e-02, 1.943e-02, -7.128e-02, -2.829e-02, -7.835e-02, -1.709e-02, 1.198e-03, -9.185e-02, 1.434e-01, -4.201e-02, 1.719e-02));
	r += mul(s2_8, M4(-2.822e-02, -3.034e-02, 8.413e-03, 9.900e-03, -5.806e-02, 5.050e-03, -2.474e-02, 2.117e-04, -6.470e-02, 2.178e-02, -2.448e-02, -3.711e-02, -1.022e-01, 7.474e-02, -8.927e-02, 3.808e-03));
	r += mul(s3_0, M4(4.649e-02, 5.995e-02, 1.237e-01, -4.439e-02, -8.079e-02, 4.739e-02, -1.330e-01, -7.111e-02, 1.249e-01, -1.390e-02, -4.347e-02, 3.089e-02, -1.378e-02, -5.054e-02, -2.371e-01, 1.214e-01));
	r += mul(s3_1, M4(-4.147e-02, 1.312e-01, 8.521e-02, 4.791e-02, -2.893e-01, -8.516e-03, 3.991e-02, 4.734e-03, 4.495e-02, -1.611e-01, -2.962e-02, 7.655e-02, 3.432e-03, -8.433e-02, 4.505e-02, -6.860e-02));
	r += mul(s3_2, M4(1.496e-01, -7.197e-03, 1.138e-01, 2.208e-02, 1.407e-01, 8.353e-02, 2.982e-02, -5.375e-02, 1.311e-01, 1.746e-02, -9.200e-02, 1.702e-01, -1.306e-01, -2.512e-02, 4.415e-03, 2.814e-02));
	r += mul(s3_3, M4(-1.853e-02, 1.384e-02, -5.118e-02, -4.419e-02, 2.638e-02, -6.450e-02, 2.930e-02, 1.899e-02, 4.168e-02, -2.827e-02, 2.166e-01, 1.744e-02, 2.126e-01, -4.719e-02, 7.233e-02, 1.602e-01));
	r += mul(s3_4, M4(-2.018e-01, -1.064e-01, 2.128e-02, -2.395e-02, -7.627e-02, 1.624e-01, -1.261e-02, -7.677e-02, -1.227e-01, 1.087e-01, -1.174e-01, -1.210e-01, 6.392e-02, 2.524e-01, 1.421e-02, 5.939e-03));
	r += mul(s3_5, M4(4.596e-02, -1.440e-01, -8.835e-02, -4.192e-02, -7.037e-02, -5.660e-02, -2.150e-02, 5.382e-02, -6.105e-02, 2.803e-02, -7.108e-02, 7.229e-02, 3.941e-02, 1.201e-01, 3.529e-02, 1.408e-01));
	r += mul(s3_6, M4(-1.068e-01, -4.231e-02, -4.821e-02, -1.200e-02, -2.450e-02, -9.650e-03, -2.444e-02, 6.018e-02, -1.616e-02, -2.735e-02, 7.351e-03, 3.997e-02, 1.775e-01, -4.294e-03, 8.998e-02, 4.438e-02));
	r += mul(s3_7, M4(-6.566e-02, -3.504e-02, 5.708e-02, 1.989e-02, -1.345e-02, -1.144e-01, -3.381e-02, 3.762e-03, 5.950e-02, -5.932e-02, -1.283e-01, 1.091e-01, -6.164e-02, -7.551e-02, -1.460e-01, -1.032e-01));
	r += mul(s3_8, M4(-3.855e-02, 4.966e-02, -7.798e-02, -4.355e-02, -7.390e-02, 8.112e-02, 1.543e-02, 9.386e-03, 5.591e-02, 1.782e-02, 6.727e-02, 5.322e-02, 4.568e-02, 9.473e-02, 1.551e-01, 1.565e-01));
	r += mul(s4_0, M4(1.207e-02, -4.134e-02, -8.656e-02, 1.051e-01, 5.978e-02, -5.619e-02, 8.259e-02, -3.851e-02, 5.958e-02, 3.760e-02, 3.731e-03, 9.856e-02, -4.378e-02, -2.418e-02, -1.357e-01, 4.911e-02));
	r += mul(s4_1, M4(-6.349e-02, 5.498e-03, 1.302e-02, 4.400e-02, -1.769e-01, 1.710e-02, 5.431e-02, -1.440e-01, 8.952e-02, 7.151e-02, -3.314e-02, 6.503e-02, -4.302e-02, -1.076e-02, 5.917e-02, 1.234e-01));
	r += mul(s4_2, M4(-9.742e-03, 7.909e-02, 1.068e-01, -1.500e-01, -6.214e-02, 1.684e-01, -7.083e-03, 1.061e-01, 6.843e-02, -3.233e-02, 3.288e-03, 1.763e-01, 6.327e-02, -5.016e-02, -3.696e-02, -2.885e-02));
	r += mul(s4_3, M4(7.711e-02, -9.057e-02, -6.094e-02, -3.825e-02, -2.414e-01, -4.039e-02, -1.211e-01, 1.822e-01, -4.986e-02, 8.437e-02, 6.417e-02, -3.619e-02, 6.501e-02, 3.774e-02, -1.829e-02, 8.399e-02));
	r += mul(s4_4, M4(-2.235e-01, -4.193e-02, 6.100e-02, 9.313e-02, 5.158e-02, 1.821e-02, -2.817e-01, -2.336e-02, -2.633e-02, 1.295e-01, -1.070e-01, -6.050e-02, -1.249e-01, 4.648e-02, 5.849e-02, 7.926e-02));
	r += mul(s4_5, M4(-1.228e-01, 8.444e-02, 1.364e-02, 1.304e-02, 1.845e-02, 2.311e-01, -6.098e-02, 1.697e-01, -2.870e-02, 3.777e-02, 7.205e-02, -1.859e-01, -1.704e-02, 1.537e-02, -2.451e-02, -7.949e-02));
	r += mul(s4_6, M4(5.451e-02, -3.614e-02, -1.951e-02, -5.074e-03, 5.978e-02, -1.053e-01, 9.244e-02, -2.224e-02, -3.829e-02, -6.109e-02, 9.426e-02, -4.299e-02, 3.332e-02, 4.194e-02, -7.992e-02, 2.305e-02));
	r += mul(s4_7, M4(-7.507e-02, 3.732e-02, 9.090e-02, 2.122e-02, -3.735e-03, -1.434e-01, 1.082e-01, 4.114e-02, -5.953e-02, -7.751e-02, -1.033e-01, -2.877e-02, -1.485e-01, -4.885e-02, 8.432e-02, 4.760e-02));
	r += mul(s4_8, M4(-1.455e-03, -9.294e-03, -8.645e-02, -2.720e-02, 4.855e-02, 3.842e-02, -5.364e-02, -5.806e-02, -2.382e-02, -2.905e-02, 8.819e-02, -1.782e-02, 2.207e-02, -1.173e-02, -2.117e-02, 4.590e-02));
	r += mul(s5_0, M4(-8.125e-03, -1.201e-01, -3.500e-02, 1.288e-01, -1.329e-02, -4.118e-02, 1.373e-01, -7.228e-02, -4.887e-02, 3.610e-02, -2.157e-02, -9.036e-02, -9.072e-02, 6.122e-02, 1.467e-01, 5.007e-02));
	r += mul(s5_1, M4(-5.586e-02, -3.752e-02, -8.767e-02, 4.407e-02, -6.290e-02, 4.581e-02, -5.733e-02, -9.619e-02, -6.788e-02, -6.172e-02, -6.335e-03, 2.470e-02, 7.034e-02, 7.370e-02, 1.988e-01, 1.529e-01));
	r += mul(s5_2, M4(-6.664e-02, 2.605e-02, 1.183e-01, -1.122e-01, -1.632e-02, 4.579e-02, -4.336e-02, -2.472e-02, 1.734e-01, -2.186e-01, -4.677e-02, 1.248e-01, -3.843e-02, 9.578e-02, 1.781e-01, 1.108e-01));
	r += mul(s5_3, M4(1.859e-02, 2.767e-02, -3.291e-02, 8.182e-02, -1.983e-01, 2.496e-02, 7.179e-02, -1.044e-02, -5.468e-02, 1.339e-02, -6.350e-02, -2.690e-02, -1.548e-01, 5.006e-02, 4.520e-02, 5.536e-02));
	r += mul(s5_4, M4(4.791e-02, -2.237e-01, 3.843e-02, 2.922e-01, 4.297e-02, -5.274e-02, -1.429e-01, -1.418e-02, 1.950e-01, -1.983e-02, 6.589e-02, -4.016e-02, -2.981e-02, 1.265e-01, -5.908e-02, 3.204e-01));
	r += mul(s5_5, M4(-8.215e-02, -8.358e-03, 5.427e-02, 1.616e-01, 2.650e-01, 2.579e-01, 1.048e-01, 1.796e-01, 1.994e-01, -1.162e-01, 3.958e-02, -4.143e-02, -1.218e-02, -2.132e-01, 3.810e-02, -1.137e-01));
	r += mul(s5_6, M4(-8.306e-02, -1.655e-02, 9.988e-02, 4.427e-02, 5.594e-02, -1.272e-01, 4.672e-02, -2.217e-02, -9.166e-02, -7.092e-03, -6.258e-02, -3.021e-02, 2.103e-02, 9.653e-04, -2.749e-01, 1.407e-02));
	r += mul(s5_7, M4(-4.161e-02, -4.165e-02, 3.618e-02, -9.598e-03, -5.146e-02, -3.152e-02, -1.016e-01, -9.250e-03, -7.838e-02, -1.316e-01, -1.805e-01, -4.386e-02, -1.398e-01, -2.552e-01, 2.408e-01, 9.776e-02));
	r += mul(s5_8, M4(-4.489e-02, -4.643e-02, -6.028e-02, 6.739e-02, -6.689e-02, -1.980e-01, -1.361e-02, -2.730e-02, -8.366e-02, -2.995e-02, -7.422e-02, -8.726e-02, 9.930e-02, 9.294e-02, 2.463e-01, 1.307e-01));
	r += mul(s6_0, M4(-1.925e-02, -1.450e-01, 1.278e-01, -3.691e-02, 3.996e-02, 2.192e-02, 2.472e-02, 1.158e-01, -7.544e-02, -1.339e-02, -3.826e-02, -3.638e-02, 7.539e-02, -4.897e-03, 4.112e-03, 6.074e-02));
	r += mul(s6_1, M4(7.575e-02, -7.417e-02, -3.193e-02, -4.582e-03, -1.736e-01, 1.612e-01, 1.551e-01, 2.777e-01, 1.157e-01, 5.545e-02, -5.521e-02, -1.460e-01, 1.296e-01, 5.881e-02, 1.068e-02, 2.062e-01));
	r += mul(s6_2, M4(7.444e-02, -8.126e-02, -3.648e-02, 1.020e-01, 8.436e-02, -9.793e-02, 6.097e-02, 6.469e-02, 2.770e-03, 1.760e-02, 1.403e-01, -2.313e-02, -2.332e-02, 1.045e-01, 5.235e-02, 3.027e-02));
	r += mul(s6_3, M4(2.360e-02, 5.155e-03, 2.159e-01, -4.357e-02, -1.689e-01, 2.565e-02, -1.304e-01, 8.094e-02, -8.436e-02, -5.728e-04, -4.061e-02, -1.199e-02, 2.252e-01, -1.218e-01, -4.930e-02, 4.136e-02));
	r += mul(s6_4, M4(2.147e-01, -9.630e-02, -1.378e-01, -1.134e-02, 8.536e-02, 1.351e-02, -2.152e-01, 2.169e-01, -1.158e-01, -2.056e-01, 1.360e-01, -6.122e-02, 1.519e-01, -4.620e-02, 1.102e-01, -4.739e-02));
	r += mul(s6_5, M4(1.090e-01, -6.277e-02, -8.943e-02, 2.391e-01, -2.288e-01, -4.373e-01, -1.907e-01, -8.421e-02, 1.734e-02, -1.778e-01, 3.346e-02, 2.713e-02, 1.243e-01, 2.066e-01, -1.519e-01, -1.352e-01));
	r += mul(s6_6, M4(1.169e-02, 4.047e-03, -5.656e-02, 5.895e-02, 9.585e-02, 1.338e-02, -1.428e-01, 3.144e-02, -3.333e-02, -1.187e-01, 1.244e-01, 1.784e-02, 2.415e-01, -1.440e-01, 1.009e-01, -7.098e-02));
	r += mul(s6_7, M4(7.430e-02, 2.219e-03, -9.441e-03, -3.378e-02, 2.002e-01, -3.325e-02, 1.963e-02, -4.829e-02, -7.793e-02, 5.002e-02, -6.652e-02, -1.204e-02, -6.962e-02, 3.672e-02, -7.999e-02, -3.428e-02));
	r += mul(s6_8, M4(2.586e-02, -1.384e-01, 1.233e-01, 6.076e-03, -1.840e-02, 5.091e-02, 1.198e-01, 4.472e-02, -5.558e-02, 9.491e-02, 6.196e-03, -9.158e-02, 6.259e-02, 8.428e-02, -1.255e-01, 4.307e-02));
	r += mul(s7_0, M4(-3.222e-02, 3.138e-03, 1.957e-02, 1.499e-02, -4.444e-02, -2.183e-02, 1.365e-01, -8.917e-02, 2.109e-02, 8.503e-02, 1.513e-01, -1.106e-02, -5.235e-02, -2.710e-02, -7.498e-02, 4.757e-02));
	r += mul(s7_1, M4(5.725e-02, -2.067e-03, 3.363e-02, -6.695e-03, -4.120e-02, 1.073e-01, 7.809e-02, 1.221e-01, 1.075e-01, -4.413e-03, -2.130e-01, -6.235e-02, 1.317e-01, -5.886e-02, -1.093e-01, 1.391e-01));
	r += mul(s7_2, M4(1.989e-02, -1.940e-03, 1.291e-01, 2.503e-02, -1.334e-01, -1.339e-01, 1.157e-01, 2.046e-02, 1.673e-01, -1.449e-02, 5.462e-02, 1.793e-02, 3.130e-02, -2.726e-02, 2.497e-02, 1.924e-02));
	r += mul(s7_3, M4(-3.377e-02, 5.555e-02, 1.141e-01, -3.961e-02, -1.033e-01, 6.960e-02, -1.337e-02, 2.128e-02, -3.006e-01, 7.817e-02, -2.368e-01, -2.991e-02, -3.921e-02, 1.059e-01, 2.200e-01, -1.236e-01));
	r += mul(s7_4, M4(1.268e-01, 1.573e-01, -1.810e-01, 5.189e-02, 9.665e-02, 1.684e-01, -9.649e-02, 1.212e-01, -3.799e-01, -1.285e-01, 2.667e-01, -1.329e-01, 1.717e-01, 1.509e-01, -2.089e-01, 1.262e-02));
	r += mul(s7_5, M4(6.720e-02, 5.172e-02, -1.574e-02, 9.391e-02, -2.402e-01, -2.259e-01, -4.062e-02, -1.388e-01, -1.521e-01, -2.795e-02, -7.224e-03, 3.382e-02, -7.645e-04, 4.380e-02, 1.791e-02, 5.836e-03));
	r += mul(s7_6, M4(-5.870e-02, 1.127e-01, -2.018e-02, 1.996e-02, 2.143e-01, 6.391e-02, 2.659e-02, 5.426e-02, -4.221e-03, 2.226e-02, -2.242e-02, -6.617e-02, -1.122e-01, 9.153e-02, 5.476e-02, -9.256e-02));
	r += mul(s7_7, M4(-1.343e-01, 2.115e-02, -1.554e-01, -3.678e-02, 9.672e-02, -4.869e-02, -4.701e-02, 3.799e-02, -2.664e-01, 1.993e-01, -5.260e-02, -7.176e-02, 6.625e-02, 5.332e-03, -1.422e-01, -5.649e-02));
	r += mul(s7_8, M4(-7.533e-02, -2.946e-02, 1.724e-01, 8.496e-03, 2.760e-02, 1.191e-02, -7.346e-02, -2.989e-02, 1.546e-02, -3.697e-02, -4.831e-02, -5.810e-04, -4.130e-02, -7.427e-02, 1.663e-01, -1.218e-01));
	r += V4(2.410e-02, -9.396e-03, 1.051e-02, 5.262e-02);
	return r;
}

V4 f3(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) {
	V4 r = 0.0;
	r += mul(s0_0, M4(-1.541e-01, 7.445e-02, -1.031e-01, -4.227e-02, 1.194e-01, 1.177e-01, 4.318e-03, 3.377e-02, -1.042e-01, -7.155e-02, 3.097e-02, 3.587e-03, 2.435e-02, 1.159e-01, 1.653e-02, 4.330e-02));
	r += mul(s0_1, M4(9.426e-02, 1.593e-02, -1.154e-02, 2.807e-01, -1.165e-01, -5.208e-02, 1.036e-01, -3.098e-02, 1.813e-02, 2.485e-02, -7.865e-02, -2.436e-01, -4.745e-02, 1.809e-01, 2.603e-02, -3.831e-02));
	r += mul(s0_2, M4(2.777e-02, -1.637e-02, -7.339e-03, 1.470e-01, 4.922e-02, 3.150e-02, 2.567e-02, 8.265e-03, 5.751e-02, 2.533e-02, -3.575e-02, 8.372e-02, 1.036e-01, 3.502e-02, -3.228e-04, -4.643e-02));
	r += mul(s0_3, M4(4.781e-02, 2.210e-01, -5.909e-04, -4.151e-03, 1.515e-01, -1.007e-01, 6.112e-02, -5.742e-02, -1.989e-01, -8.581e-02, -7.817e-02, 1.420e-02, -8.560e-03, 5.586e-02, -6.567e-02, -4.390e-02));
	r += mul(s0_4, M4(-5.837e-02, 3.538e-02, -5.611e-02, -8.464e-02, -8.826e-02, -9.334e-02, 1.638e-01, -9.774e-02, -4.502e-02, 2.860e-02, 4.700e-02, -4.664e-02, -1.113e-01, -1.132e-01, -6.476e-02, -2.240e-02));
	r += mul(s0_5, M4(-1.893e-02, 1.075e-01, -1.443e-01, 4.658e-02, -3.577e-02, -1.106e-01, -1.077e-01, -5.052e-02, -1.149e-02, -4.290e-02, 3.050e-02, 1.243e-01, -9.772e-02, -3.819e-02, -1.082e-01, -3.553e-02));
	r += mul(s0_6, M4(2.176e-02, -1.894e-01, -7.480e-02, 1.118e-01, 4.854e-02, 1.391e-02, 5.312e-02, -6.790e-02, -1.328e-02, -2.371e-02, -5.487e-02, -3.335e-02, -4.248e-02, 1.991e-02, -4.070e-02, 3.184e-02));
	r += mul(s0_7, M4(-4.676e-02, 1.857e-01, 1.548e-01, -8.722e-02, 2.913e-02, -2.723e-02, -7.639e-02, -3.793e-02, 1.133e-01, -1.049e-01, -6.420e-02, -5.949e-03, -1.141e-01, -1.956e-02, 5.989e-02, -1.345e-01));
	r += mul(s0_8, M4(3.326e-02, -1.111e-01, 1.462e-01, 8.195e-02, 6.368e-02, -1.687e-03, -1.765e-02, 4.541e-02, 1.443e-01, 1.527e-01, 2.521e-02, -6.235e-02, -7.915e-02, -4.487e-02, -5.134e-02, 3.652e-02));
	r += mul(s1_0, M4(3.368e-02, 8.714e-02, 6.110e-03, -1.981e-01, 2.033e-01, -8.898e-02, 8.475e-03, 1.873e-01, -1.498e-01, 4.971e-02, 1.536e-01, 3.002e-01, -2.681e-01, -3.378e-02, 1.997e-02, 4.818e-01));
	r += mul(s1_1, M4(1.055e-01, 1.301e-01, -2.522e-02, -7.098e-02, -1.269e-01, 2.670e-02, 1.136e-01, 7.816e-02, -1.890e-01, 2.785e-01, -2.470e-01, -8.519e-02, -2.315e-02, 1.341e-01, -9.506e-02, 1.278e-01));
	r += mul(s1_2, M4(1.948e-02, -3.190e-02, -3.223e-02, 3.931e-02, 3.433e-02, 1.312e-01, -1.393e-02, -1.037e-01, 8.951e-02, 1.129e-01, 6.835e-02, -9.325e-02, 1.195e-01, -3.151e-02, -1.330e-02, 5.604e-03));
	r += mul(s1_3, M4(9.086e-02, 1.209e-01, 6.033e-02, -1.874e-02, -1.190e-01, -1.473e-01, 8.160e-02, 5.619e-02, -3.533e-01, -1.393e-01, -2.688e-01, 1.894e-01, 3.564e-02, 2.016e-01, -1.385e-01, 4.488e-02));
	r += mul(s1_4, M4(-9.440e-03, -1.706e-02, -1.748e-02, 2.662e-02, 4.748e-02, 1.932e-02, 1.696e-01, 2.769e-01, -5.026e-01, 2.010e-01, 9.951e-02, 1.637e-01, 1.957e-01, -3.575e-01, -1.192e-01, 1.617e-01));
	r += mul(s1_5, M4(-3.517e-03, -4.071e-02, -8.419e-02, 1.006e-01, -1.733e-01, 2.983e-01, -1.682e-01, 5.680e-02, -5.220e-02, -1.610e-01, -2.310e-01, -1.312e-02, 1.825e-02, -3.759e-02, -1.022e-01, -5.093e-02));
	r += mul(s1_6, M4(-8.118e-02, -5.037e-03, 1.736e-03, -3.371e-02, -1.289e-02, 8.800e-03, 9.251e-02, -1.756e-01, -1.530e-02, 2.300e-01, 5.308e-02, -1.295e-01, 6.294e-02, 1.396e-01, -3.647e-02, 2.122e-03));
	r += mul(s1_7, M4(3.921e-02, 1.880e-04, 7.184e-02, -5.317e-03, -1.052e-01, 1.530e-01, -2.644e-01, -1.468e-02, 3.993e-02, -4.042e-03, 6.291e-02, -3.918e-02, -2.002e-01, -2.985e-02, -3.781e-02, 1.375e-02));
	r += mul(s1_8, M4(-8.215e-02, 9.509e-03, 2.562e-02, -2.062e-02, 1.324e-01, 1.454e-02, 3.688e-02, 1.111e-01, 2.343e-01, -4.873e-02, 1.626e-01, -6.006e-02, -1.030e-01, 7.297e-02, -2.219e-02, 9.851e-02));
	r += mul(s2_0, M4(5.611e-02, -3.181e-01, -4.837e-02, -6.922e-02, 2.914e-03, 5.952e-02, -4.679e-02, -2.544e-02, 2.193e-02, -1.003e-01, -9.020e-03, 2.205e-02, -1.759e-03, 3.894e-02, 1.423e-02, -8.472e-02));
	r += mul(s2_1, M4(-7.214e-02, 1.584e-02, -4.465e-02, 2.169e-01, -3.078e-02, 5.434e-02, -3.933e-02, 1.729e-03, -1.555e-02, 5.433e-02, 2.206e-02, 4.131e-02, -8.110e-02, -1.106e-01, -4.616e-02, -2.315e-02));
	r += mul(s2_2, M4(1.754e-01, -5.070e-02, -3.150e-02, 7.899e-02, 5.769e-02, -2.129e-01, -4.636e-02, 4.046e-02, -1.395e-01, 4.660e-02, 2.975e-02, 1.117e-02, -6.117e-02, -1.751e-02, -2.903e-02, 6.764e-02));
	r += mul(s2_3, M4(-6.471e-02, 2.146e-01, 5.085e-02, -5.202e-01, -1.359e-02, 5.058e-02, -7.127e-02, -1.601e-02, -6.091e-02, -1.459e-02, -7.134e-02, 1.549e-01, 1.276e-01, -9.287e-02, 1.771e-01, 1.394e-01));
	r += mul(s2_4, M4(1.596e-01, -1.380e-01, -2.423e-01, 2.340e-01, 1.000e-02, 2.824e-02, 1.236e-01, 3.541e-02, -1.289e-01, 2.382e-02, -6.086e-02, 2.541e-02, 5.685e-03, -3.143e-02, 5.591e-02, -1.216e-01));
	r += mul(s2_5, M4(-8.766e-02, -2.235e-02, -2.451e-02, 1.227e-02, 1.550e-01, -5.607e-02, 7.935e-02, 8.179e-02, -2.942e-01, 1.411e-01, -1.799e-02, -3.496e-02, -2.725e-02, 1.133e-01, 5.287e-02, 1.074e-01));
	r += mul(s2_6, M4(-1.036e-01, 1.293e-01, 5.427e-02, -3.299e-01, -6.639e-02, 2.028e-02, -6.833e-02, 4.915e-02, -4.676e-03, 4.487e-02, -5.957e-03, 7.035e-02, -7.784e-02, 5.093e-02, 1.658e-01, 1.278e-02));
	r += mul(s2_7, M4(3.254e-02, -6.731e-02, 6.136e-02, -6.006e-02, -4.211e-03, 1.189e-01, 7.107e-02, -5.250e-02, 1.580e-01, 1.173e-01, 7.031e-02, -2.363e-02, 4.882e-02, -6.122e-02, 3.727e-02, 1.939e-02));
	r += mul(s2_8, M4(2.800e-02, -2.528e-02, 4.430e-02, -3.989e-02, -3.861e-02, -7.836e-02, -5.991e-02, -1.907e-02, -1.333e-01, 3.260e-02, -6.408e-03, 5.615e-02, 2.487e-02, 3.189e-02, 7.691e-02, -1.030e-01));
	r += mul(s3_0, M4(-7.688e-02, 4.350e-02, -1.354e-01, -5.353e-02, 1.306e-01, 1.329e-01, -4.048e-02, -8.876e-02, 1.484e-02, -2.443e-01, 3.756e-03, 7.480e-02, -8.744e-02, 8.263e-02, -2.327e-02, 1.993e-02));
	r += mul(s3_1, M4(1.341e-02, -4.843e-02, 1.961e-02, 1.442e-01, 1.704e-01, 8.335e-02, -9.952e-02, 1.931e-02, -5.787e-02, -2.613e-01, 2.198e-02, 6.073e-02, -1.278e-02, -2.314e-01, 3.446e-02, 2.892e-02));
	r += mul(s3_2, M4(1.312e-02, -1.539e-02, -6.587e-02, 3.002e-02, 1.623e-01, -1.527e-01, -2.507e-02, 8.886e-02, -3.306e-03, 1.216e-01, -5.373e-02, -4.630e-02, 2.683e-02, 1.528e-03, 1.237e-01, -1.357e-01));
	r += mul(s3_3, M4(-3.629e-02, -1.790e-02, 8.143e-02, 3.422e-02, -7.654e-02, -1.413e-01, 1.523e-02, -1.196e-01, 5.257e-02, -1.080e-01, -3.571e-03, -1.339e-01, 1.751e-02, 4.219e-02, 9.139e-02, 1.905e-01));
	r += mul(s3_4, M4(-1.332e-01, 1.085e-01, -5.291e-02, 1.711e-02, -1.511e-01, -1.611e-02, 6.475e-02, -1.778e-01, 1.864e-02, 1.446e-03, -9.403e-03, -1.638e-01, -6.173e-02, -1.837e-01, -7.164e-02, 9.682e-02));
	r += mul(s3_5, M4(4.124e-02, -5.000e-02, -3.391e-02, -5.335e-02, 1.619e-01, 4.778e-02, 1.230e-01, -9.066e-02, 2.046e-01, 2.125e-02, -5.583e-02, -2.781e-01, 2.121e-01, 4.222e-02, 5.072e-02, -6.791e-02));
	r += mul(s3_6, M4(5.591e-02, -4.172e-02, 5.074e-03, 6.773e-02, 7.004e-02, 3.783e-03, 1.241e-02, -3.087e-02, 5.125e-02, -2.546e-02, -6.424e-02, -1.288e-01, -6.226e-02, -1.023e-01, -1.236e-01, -1.059e-01));
	r += mul(s3_7, M4(6.663e-02, 2.416e-02, -5.159e-02, -3.987e-02, -1.621e-02, 1.979e-02, 4.159e-02, -6.761e-02, -3.534e-02, -1.029e-01, -1.345e-02, -4.275e-02, -2.683e-01, 1.552e-01, -2.544e-01, 2.859e-04));
	r += mul(s3_8, M4(-1.992e-02, 5.768e-02, -5.584e-03, 6.441e-03, 1.317e-01, 3.139e-02, 6.948e-02, 1.451e-02, 1.074e-01, -7.998e-02, -8.032e-02, -6.475e-02, 1.482e-01, -9.720e-03, -5.256e-02, -1.479e-02));
	r += mul(s4_0, M4(-4.839e-02, 1.140e-03, -3.622e-02, 7.300e-02, 4.100e-02, -1.103e-01, 3.436e-02, -2.778e-02, -9.181e-02, -6.152e-02, -4.272e-02, 1.237e-01, 1.008e-02, 6.290e-02, -8.303e-03, 9.899e-03));
	r += mul(s4_1, M4(-6.312e-02, -8.452e-02, 7.491e-03, 9.689e-02, -6.351e-02, -1.581e-01, -5.564e-02, -7.967e-03, -3.090e-02, -3.347e-02, 9.113e-02, 1.925e-01, 4.861e-02, -2.838e-02, 4.783e-02, -1.699e-01));
	r += mul(s4_2, M4(-1.478e-01, 4.556e-02, -5.336e-02, -6.732e-03, -2.761e-03, -5.113e-02, -1.607e-03, 1.340e-01, 1.012e-01, 2.428e-02, 1.503e-02, -7.795e-02, 6.548e-02, -7.697e-02, 1.112e-02, 3.049e-03));
	r += mul(s4_3, M4(-2.407e-03, -2.252e-03, -4.581e-02, 5.584e-02, 5.979e-03, -4.358e-02, 6.233e-02, 1.063e-02, -2.296e-01, -1.326e-01, -4.987e-02, 1.989e-02, 1.359e-01, 2.509e-01, 1.292e-01, 7.777e-02));
	r += mul(s4_4, M4(-2.050e-01, 1.206e-01, 3.710e-02, 1.986e-01, -1.741e-01, 7.256e-02, -3.484e-02, -3.576e-01, 2.451e-01, -1.688e-01, -1.057e-01, 9.107e-02, 8.758e-02, -2.751e-01, 2.239e-01, 2.364e-02));
	r += mul(s4_5, M4(-5.809e-02, 1.454e-02, -3.366e-02, -2.671e-02, -9.982e-02, -1.511e-02, -1.628e-01, -1.030e-01, -6.869e-02, 1.522e-01, 1.662e-01, -3.735e-02, 1.431e-01, 3.946e-03, 1.875e-01, -6.482e-02));
	r += mul(s4_6, M4(3.218e-02, -7.122e-02, -1.460e-02, 7.275e-02, 8.113e-02, -3.292e-03, -5.182e-02, 1.175e-01, 1.257e-02, 5.358e-02, -3.594e-02, 5.446e-02, 7.923e-02, 3.842e-02, 2.085e-01, 7.095e-02));
	r += mul(s4_7, M4(1.590e-02, 1.359e-01, 3.769e-03, 4.688e-02, 4.924e-02, 1.446e-01, 1.228e-01, -2.817e-02, -1.074e-01, 1.347e-01, 2.297e-02, 2.567e-02, 5.231e-02, -3.432e-03, 3.995e-01, -2.672e-02));
	r += mul(s4_8, M4(-1.036e-01, -3.753e-02, 7.380e-02, -7.441e-03, 2.090e-04, 7.063e-02, 1.665e-01, 6.673e-03, 9.839e-02, 1.731e-02, -1.267e-01, 2.546e-02, 1.349e-01, -7.099e-02, 8.893e-03, -3.966e-02));
	r += mul(s5_0, M4(-3.046e-02, -1.849e-01, -1.706e-02, -1.224e-01, -5.873e-03, -1.910e-01, 5.894e-02, 1.852e-01, 8.535e-03, 8.984e-02, -5.644e-02, -1.735e-01, -2.286e-01, 1.058e-01, -3.983e-02, 2.355e-02));
	r += mul(s5_1, M4(2.592e-02, 6.491e-03, 1.929e-02, -7.618e-02, -1.963e-01, -1.009e-01, 6.043e-02, -1.961e-01, -1.542e-02, 5.331e-04, 8.130e-02, 1.849e-01, -2.411e-01, 1.440e-01, -7.786e-02, 3.432e-02));
	r += mul(s5_2, M4(-8.275e-03, 3.609e-03, 9.891e-03, -8.344e-02, 2.447e-02, 1.084e-01, 9.394e-02, -1.679e-02, -6.476e-02, 4.520e-02, 9.260e-02, 2.126e-01, -1.950e-01, 9.232e-02, -3.969e-02, 6.067e-02));
	r += mul(s5_3, M4(-7.232e-04, -6.282e-02, 5.756e-02, -3.110e-02, -1.114e-01, -7.194e-02, 4.142e-03, 1.695e-01, 7.497e-02, -2.395e-02, 6.252e-02, 1.579e-02, -1.255e-01, 2.525e-01, 5.575e-02, 6.153e-01));
	r += mul(s5_4, M4(-6.491e-02, 7.502e-02, 3.167e-03, -2.348e-02, -8.472e-02, 2.548e-01, -4.978e-02, -1.951e-02, -3.048e-02, -5.704e-02, 1.774e-01, -6.338e-03, -2.838e-01, -1.511e-01, -4.053e-02, 6.059e-01));
	r += mul(s5_5, M4(-2.412e-02, -1.205e-01, -6.613e-02, -5.165e-02, -8.040e-02, -4.929e-02, -1.725e-01, -3.018e-02, -1.233e-01, -9.717e-03, 5.918e-02, 1.565e-01, -1.531e-02, 5.785e-02, 1.118e-01, -3.537e-02));
	r += mul(s5_6, M4(3.586e-02, -4.831e-02, -1.700e-02, -1.002e-01, 5.432e-02, -1.102e-02, -2.840e-02, 1.737e-01, 3.152e-03, 1.527e-01, 4.713e-03, 7.891e-03, -1.070e-01, -7.131e-02, -3.445e-02, 7.807e-02));
	r += mul(s5_7, M4(-6.315e-02, 1.014e-01, 1.688e-01, -1.030e-01, 1.241e-01, 7.856e-02, 8.263e-02, 2.699e-02, -3.012e-02, 2.876e-02, 1.430e-01, -1.694e-01, -8.311e-02, -5.530e-02, -2.528e-01, 1.123e-01));
	r += mul(s5_8, M4(4.501e-02, 7.023e-02, 9.987e-02, 1.779e-02, 2.821e-02, -7.071e-02, 8.364e-02, -4.817e-02, 1.017e-01, -1.175e-01, 6.084e-02, -2.026e-02, -4.104e-02, 1.192e-01, -2.724e-01, -7.172e-02));
	r += mul(s6_0, M4(-2.403e-02, -1.430e-01, -4.463e-02, 2.115e-01, -3.311e-03, 1.294e-01, 2.786e-02, 6.036e-02, -3.302e-02, -3.810e-02, 1.050e-02, 9.657e-03, -6.527e-02, -3.026e-02, -1.425e-02, -2.182e-01));
	r += mul(s6_1, M4(-1.318e-01, 2.104e-02, -1.155e-01, 1.466e-01, -1.479e-01, 3.399e-01, 4.071e-02, -4.176e-02, -1.956e-02, -4.514e-02, -1.132e-03, -3.509e-04, 4.392e-02, -4.815e-03, -5.243e-02, -3.786e-02));
	r += mul(s6_2, M4(1.311e-02, -1.032e-02, -8.790e-03, 1.187e-01, -5.755e-02, -1.255e-01, -1.005e-01, -1.453e-02, -1.102e-02, -4.807e-02, 1.795e-02, -1.851e-01, -9.856e-02, 1.487e-01, -7.258e-02, -5.214e-03));
	r += mul(s6_3, M4(-4.654e-02, -3.135e-02, -1.684e-01, 3.834e-01, -2.876e-02, 1.784e-01, -1.386e-02, -4.142e-02, -8.756e-02, -2.283e-01, -9.441e-02, -4.160e-02, 2.263e-01, 2.885e-01, -3.452e-03, -2.400e-01));
	r += mul(s6_4, M4(7.687e-02, -6.810e-02, -1.320e-01, 2.525e-01, 1.797e-01, -2.430e-02, 1.885e-02, -6.292e-02, -3.197e-02, -7.428e-03, -2.033e-02, 1.679e-01, 6.859e-02, 7.989e-02, -7.397e-02, -2.282e-01));
	r += mul(s6_5, M4(4.482e-02, -7.678e-02, 1.278e-03, 2.601e-01, 4.335e-03, 1.209e-01, 3.467e-03, -2.305e-01, 1.498e-01, 8.959e-02, 3.266e-02, 1.003e-01, -1.326e-01, -3.894e-02, -6.801e-02, 5.746e-02));
	r += mul(s6_6, M4(5.336e-02, -9.724e-02, -1.431e-01, 2.915e-01, 1.202e-01, -1.028e-01, -8.011e-03, 1.251e-01, 4.663e-02, -5.798e-02, -8.256e-02, 2.405e-02, -5.388e-02, -8.614e-02, -7.355e-02, -3.901e-02));
	r += mul(s6_7, M4(-3.570e-02, 2.925e-01, -7.481e-02, 2.625e-02, 1.195e-01, 4.382e-02, -3.380e-02, 1.320e-02, -6.054e-02, 1.930e-01, 1.641e-01, -5.457e-02, -6.494e-02, 4.445e-02, 1.441e-01, 4.620e-02));
	r += mul(s6_8, M4(9.954e-02, 1.499e-01, 3.501e-02, 1.605e-02, -2.730e-01, 1.298e-01, -4.405e-02, -2.582e-02, -5.153e-03, -1.636e-02, -3.186e-02, -1.102e-01, 8.378e-02, -5.718e-02, 1.241e-02, 2.242e-02));
	r += mul(s7_0, M4(4.522e-03, 1.157e-01, 6.721e-02, -1.100e-01, -3.192e-02, 1.623e-02, -1.048e-02, 4.959e-02, -8.910e-02, -8.295e-02, 3.892e-03, 7.422e-02, 7.690e-02, 1.323e-02, 1.388e-01, 3.940e-02));
	r += mul(s7_1, M4(-6.403e-03, 9.173e-02, 7.153e-03, -1.033e-01, -6.463e-02, 6.393e-02, -4.339e-02, -6.190e-03, -1.071e-01, 7.056e-02, 5.481e-02, 6.483e-02, -6.428e-02, 2.973e-02, 1.292e-01, 1.169e-01));
	r += mul(s7_2, M4(5.770e-02, 5.588e-02, 6.031e-02, -1.759e-01, -3.895e-02, 2.467e-03, -2.560e-02, -1.027e-01, 2.629e-02, -1.205e-01, -9.231e-02, -2.143e-01, -2.209e-02, 7.736e-02, 1.911e-02, -6.816e-02));
	r += mul(s7_3, M4(1.705e-02, 5.705e-02, -4.597e-02, -7.820e-02, 8.144e-03, 6.708e-02, -3.695e-02, 2.001e-01, -1.255e-01, -1.630e-02, -2.811e-02, -5.311e-03, 1.111e-01, 6.014e-02, 1.773e-02, 8.124e-02));
	r += mul(s7_4, M4(1.531e-02, -1.537e-02, -9.494e-02, -6.804e-02, 1.393e-01, -6.602e-02, 4.988e-02, 2.267e-01, -3.947e-02, 8.325e-02, -8.862e-02, -6.870e-02, 1.717e-01, -2.222e-01, 4.594e-02, -1.655e-01));
	r += mul(s7_5, M4(1.169e-01, 1.878e-02, 4.611e-02, 2.927e-02, 7.513e-02, 4.478e-02, 8.064e-02, 2.155e-02, 2.792e-01, -1.029e-01, 9.900e-02, -3.829e-02, -1.461e-02, -5.504e-02, -6.803e-02, -1.202e-02));
	r += mul(s7_6, M4(3.775e-02, -4.092e-02, -9.230e-02, 3.931e-02, -2.544e-02, -5.744e-02, -1.008e-01, 8.357e-03, 4.594e-03, -5.746e-03, -1.147e-01, -3.240e-02, 2.209e-02, -7.544e-02, 8.800e-02, 3.604e-02));
	r += mul(s7_7, M4(7.549e-02, -3.910e-02, -5.900e-02, -2.941e-03, -8.728e-02, -4.351e-02, -7.503e-02, 1.369e-01, -4.670e-02, 2.395e-01, 2.080e-01, -6.757e-02, 1.841e-02, 7.237e-03, -4.694e-02, 1.169e-02));
	r += mul(s7_8, M4(7.998e-02, -1.301e-02, 8.638e-02, -2.254e-01, -1.370e-01, -5.376e-02, -3.425e-02, -2.080e-03, -1.539e-01, -1.300e-01, 4.326e-02, -3.162e-02, 3.087e-03, 1.125e-01, 8.557e-02, -6.775e-02));
	r += V4(-1.190e-02, 2.995e-03, -4.483e-02, 1.950e-02);
	return r;
}

void Pass5(uint2 blockStart, uint3 tid) {
	float2 pt = float2(GetInputPt());
	uint2 gxy = Rmp8x8(tid.x) + blockStart;
	uint2 size = GetInputSize();
	if (gxy.x >= size.x || gxy.y >= size.y) {
		return;
	}
	float2 pos = (gxy + 0.5) * pt;

	V4 s0_0 = l0(-1.0, -1.0);
	V4 s0_1 = l0(0.0, -1.0);
	V4 s0_2 = l0(1.0, -1.0);
	V4 s0_3 = l0(-1.0, 0.0);
	V4 s0_4 = l0(0.0, 0.0);
	V4 s0_5 = l0(1.0, 0.0);
	V4 s0_6 = l0(-1.0, 1.0);
	V4 s0_7 = l0(0.0, 1.0);
	V4 s0_8 = l0(1.0, 1.0);
	V4 s1_0 = -max(-s0_0, 0.0);
	V4 s1_1 = -max(-s0_1, 0.0);
	V4 s1_2 = -max(-s0_2, 0.0);
	V4 s1_3 = -max(-s0_3, 0.0);
	V4 s1_4 = -max(-s0_4, 0.0);
	V4 s1_5 = -max(-s0_5, 0.0);
	V4 s1_6 = -max(-s0_6, 0.0);
	V4 s1_7 = -max(-s0_7, 0.0);
	V4 s1_8 = -max(-s0_8, 0.0);
	s0_0 = max(s0_0, 0.0);
	s0_1 = max(s0_1, 0.0);
	s0_2 = max(s0_2, 0.0);
	s0_3 = max(s0_3, 0.0);
	s0_4 = max(s0_4, 0.0);
	s0_5 = max(s0_5, 0.0);
	s0_6 = max(s0_6, 0.0);
	s0_7 = max(s0_7, 0.0);
	s0_8 = max(s0_8, 0.0);

	V4 s2_0 = l1(-1.0, -1.0);
	V4 s2_1 = l1(0.0, -1.0);
	V4 s2_2 = l1(1.0, -1.0);
	V4 s2_3 = l1(-1.0, 0.0);
	V4 s2_4 = l1(0.0, 0.0);
	V4 s2_5 = l1(1.0, 0.0);
	V4 s2_6 = l1(-1.0, 1.0);
	V4 s2_7 = l1(0.0, 1.0);
	V4 s2_8 = l1(1.0, 1.0);
	V4 s3_0 = -max(-s2_0, 0.0);
	V4 s3_1 = -max(-s2_1, 0.0);
	V4 s3_2 = -max(-s2_2, 0.0);
	V4 s3_3 = -max(-s2_3, 0.0);
	V4 s3_4 = -max(-s2_4, 0.0);
	V4 s3_5 = -max(-s2_5, 0.0);
	V4 s3_6 = -max(-s2_6, 0.0);
	V4 s3_7 = -max(-s2_7, 0.0);
	V4 s3_8 = -max(-s2_8, 0.0);
	s2_0 = max(s2_0, 0.0);
	s2_1 = max(s2_1, 0.0);
	s2_2 = max(s2_2, 0.0);
	s2_3 = max(s2_3, 0.0);
	s2_4 = max(s2_4, 0.0);
	s2_5 = max(s2_5, 0.0);
	s2_6 = max(s2_6, 0.0);
	s2_7 = max(s2_7, 0.0);
	s2_8 = max(s2_8, 0.0);

	V4 s4_0 = l2(-1.0, -1.0);
	V4 s4_1 = l2(0.0, -1.0);
	V4 s4_2 = l2(1.0, -1.0);
	V4 s4_3 = l2(-1.0, 0.0);
	V4 s4_4 = l2(0.0, 0.0);
	V4 s4_5 = l2(1.0, 0.0);
	V4 s4_6 = l2(-1.0, 1.0);
	V4 s4_7 = l2(0.0, 1.0);
	V4 s4_8 = l2(1.0, 1.0);
	V4 s5_0 = -max(-s4_0, 0.0);
	V4 s5_1 = -max(-s4_1, 0.0);
	V4 s5_2 = -max(-s4_2, 0.0);
	V4 s5_3 = -max(-s4_3, 0.0);
	V4 s5_4 = -max(-s4_4, 0.0);
	V4 s5_5 = -max(-s4_5, 0.0);
	V4 s5_6 = -max(-s4_6, 0.0);
	V4 s5_7 = -max(-s4_7, 0.0);
	V4 s5_8 = -max(-s4_8, 0.0);
	s4_0 = max(s4_0, 0.0);
	s4_1 = max(s4_1, 0.0);
	s4_2 = max(s4_2, 0.0);
	s4_3 = max(s4_3, 0.0);
	s4_4 = max(s4_4, 0.0);
	s4_5 = max(s4_5, 0.0);
	s4_6 = max(s4_6, 0.0);
	s4_7 = max(s4_7, 0.0);
	s4_8 = max(s4_8, 0.0);

	V4 s6_0 = l3(-1.0, -1.0);
	V4 s6_1 = l3(0.0, -1.0);
	V4 s6_2 = l3(1.0, -1.0);
	V4 s6_3 = l3(-1.0, 0.0);
	V4 s6_4 = l3(0.0, 0.0);
	V4 s6_5 = l3(1.0, 0.0);
	V4 s6_6 = l3(-1.0, 1.0);
	V4 s6_7 = l3(0.0, 1.0);
	V4 s6_8 = l3(1.0, 1.0);
	V4 s7_0 = -max(-s6_0, 0.0);
	V4 s7_1 = -max(-s6_1, 0.0);
	V4 s7_2 = -max(-s6_2, 0.0);
	V4 s7_3 = -max(-s6_3, 0.0);
	V4 s7_4 = -max(-s6_4, 0.0);
	V4 s7_5 = -max(-s6_5, 0.0);
	V4 s7_6 = -max(-s6_6, 0.0);
	V4 s7_7 = -max(-s6_7, 0.0);
	V4 s7_8 = -max(-s6_8, 0.0);
	s6_0 = max(s6_0, 0.0);
	s6_1 = max(s6_1, 0.0);
	s6_2 = max(s6_2, 0.0);
	s6_3 = max(s6_3, 0.0);
	s6_4 = max(s6_4, 0.0);
	s6_5 = max(s6_5, 0.0);
	s6_6 = max(s6_6, 0.0);
	s6_7 = max(s6_7, 0.0);
	s6_8 = max(s6_8, 0.0);

	t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8);
	t1[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8);
	t2[gxy] = f2(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8);
	t3[gxy] = f3(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8);
}

//!PASS 6
//!DESC conv5
//!BLOCK_SIZE 8
//!NUM_THREADS 64
//!IN t0, t1, t2, t3
//!OUT t4, t5, t6, t7

#define l0(x, y) V4(O(t0, float2(x, y)))
#define l1(x, y) V4(O(t1, float2(x, y)))
#define l2(x, y) V4(O(t2, float2(x, y)))
#define l3(x, y) V4(O(t3, float2(x, y)))

V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) {
	V4 r = 0.0;
	r += mul(s0_0, M4(-3.325e-02, 3.102e-02, -1.201e-02, 2.268e-02, 7.444e-03, 1.479e-01, -1.168e-01, -5.885e-03, 4.910e-03, -2.434e-02, 4.692e-02, -8.707e-03, 1.163e-01, -5.718e-02, 4.571e-02, -3.987e-03));
	r += mul(s0_1, M4(9.745e-02, 4.481e-02, -5.736e-03, 4.527e-02, -6.175e-02, -8.090e-02, -1.783e-01, -4.490e-02, -1.074e-01, 6.241e-03, 1.121e-01, 5.417e-02, -1.167e-01, -1.013e-01, -9.217e-03, -8.213e-02));
	r += mul(s0_2, M4(-1.143e-01, -2.914e-02, 1.086e-02, 5.355e-02, -5.619e-02, 7.229e-02, -3.712e-02, -2.005e-02, 1.814e-02, 3.906e-02, 9.007e-02, 4.586e-02, 4.925e-02, -4.990e-02, 3.002e-02, -5.329e-02));
	r += mul(s0_3, M4(-7.281e-02, -1.456e-01, 6.030e-02, -7.220e-03, 1.217e-02, 3.942e-02, 2.493e-03, -2.184e-02, -4.370e-02, -2.313e-02, -3.702e-03, -1.439e-02, 5.491e-02, -1.097e-01, -1.525e-02, 1.306e-02));
	r += mul(s0_4, M4(2.689e-02, 6.599e-02, 1.036e-01, -8.721e-02, -2.337e-01, 1.000e-01, 1.439e-02, 2.684e-01, 2.232e-02, 7.914e-03, -3.002e-02, 7.379e-02, -2.289e-02, -1.065e-02, -4.016e-02, -1.150e-01));
	r += mul(s0_5, M4(7.046e-02, -1.307e-03, -1.191e-02, 4.801e-02, -2.147e-02, 1.351e-01, 3.794e-02, -3.068e-02, 3.729e-02, 1.114e-02, -1.309e-02, 1.434e-02, -1.162e-01, 1.812e-02, -5.316e-02, -5.238e-02));
	r += mul(s0_6, M4(-1.274e-03, 3.140e-02, 1.683e-02, 3.169e-02, -2.787e-03, -9.452e-02, 1.432e-02, -2.380e-02, -3.839e-02, 1.362e-01, -8.840e-03, 2.950e-02, -9.202e-02, -4.772e-02, 2.859e-02, 3.891e-02));
	r += mul(s0_7, M4(5.686e-02, 2.562e-02, 3.277e-02, -3.834e-03, -4.117e-02, -1.190e-03, 8.718e-02, -3.681e-02, 2.798e-02, 1.672e-02, 8.348e-02, 1.928e-02, 2.370e-02, 6.126e-02, 5.123e-02, -7.158e-02));
	r += mul(s0_8, M4(6.307e-04, 2.505e-02, -2.436e-03, -2.344e-02, -6.344e-02, 2.815e-02, -3.121e-02, 5.070e-02, 2.422e-02, 8.561e-03, 2.690e-02, -8.370e-02, 9.301e-03, -2.347e-02, -4.862e-02, -4.908e-02));
	r += mul(s1_0, M4(-2.105e-01, 5.742e-02, 5.282e-02, 1.075e-01, 8.419e-02, -8.720e-03, 3.498e-02, 8.101e-03, -4.217e-02, -6.896e-02, 1.060e-01, 1.892e-02, 1.353e-01, -5.279e-02, 3.427e-02, -1.061e-01));
	r += mul(s1_1, M4(-5.899e-02, -8.880e-02, 4.749e-02, 5.689e-02, -1.243e-01, -1.050e-01, 1.305e-01, -1.822e-01, -1.794e-01, -2.956e-01, 2.878e-01, -5.563e-02, 1.107e-01, -9.594e-02, 1.681e-01, -4.173e-01));
	r += mul(s1_2, M4(2.088e-01, -1.263e-02, 8.239e-02, 1.372e-01, -1.523e-01, -4.600e-02, -6.727e-02, 1.620e-02, -1.129e-01, -1.329e-01, -2.328e-02, -8.638e-02, -8.206e-02, -5.865e-02, 7.702e-02, 8.697e-02));
	r += mul(s1_3, M4(1.856e-03, -1.151e-01, 1.269e-01, -1.246e-01, -2.480e-02, 1.943e-02, -7.583e-02, -7.204e-02, 2.768e-02, 6.190e-02, 8.628e-02, -1.388e-01, 3.124e-02, -8.930e-02, -1.392e-01, 3.038e-01));
	r += mul(s1_4, M4(1.273e-01, -4.104e-02, 5.600e-01, -1.033e-01, -2.556e-01, 8.996e-02, -2.634e-02, 1.067e-02, -2.025e-01, 1.187e-01, 3.098e-01, -1.177e-01, 2.340e-01, 9.085e-02, -7.989e-02, -3.875e-01));
	r += mul(s1_5, M4(-4.001e-01, -2.410e-01, -2.005e-01, 3.053e-01, -4.187e-02, -2.079e-02, 3.795e-02, -5.801e-01, 1.572e-01, -3.704e-02, 1.789e-01, 3.634e-02, -2.765e-03, 3.068e-01, 6.566e-02, 1.782e-01));
	r += mul(s1_6, M4(-5.601e-02, -2.663e-01, -5.452e-02, -3.342e-02, 2.845e-02, -5.523e-02, 3.937e-02, -1.254e-02, -9.803e-02, -4.092e-02, 1.070e-01, -1.884e-02, -8.056e-02, 5.957e-02, -2.268e-01, -2.883e-02));
	r += mul(s1_7, M4(3.602e-02, 3.605e-01, -1.058e-01, -1.025e-01, 7.327e-02, 1.212e-02, 3.897e-02, -1.530e-03, -2.248e-01, 2.349e-01, 7.325e-02, -6.040e-02, 2.359e-01, 5.645e-01, 1.694e-02, -4.593e-02));
	r += mul(s1_8, M4(-6.717e-02, -2.712e-01, -7.268e-02, 4.477e-02, 5.479e-02, 2.246e-02, 3.345e-02, -7.484e-02, 4.180e-02, -4.276e-02, 2.712e-02, 1.372e-02, 6.067e-02, -6.065e-02, -1.066e-01, -1.852e-02));
	r += mul(s2_0, M4(-9.897e-02, -1.366e-01, 3.089e-02, -4.977e-03, 6.995e-02, 2.994e-01, 8.808e-02, -3.481e-02, -5.962e-02, -5.574e-02, -5.478e-02, -2.862e-02, 3.514e-02, 9.242e-02, -1.727e-02, -1.140e-02));
	r += mul(s2_1, M4(-2.045e-01, 1.060e-01, 1.197e-01, -7.549e-02, 2.766e-01, -3.171e-02, 8.807e-02, -1.171e-02, 8.605e-02, -1.305e-01, -1.057e-01, -8.969e-02, -1.155e-01, 3.084e-02, -2.138e-02, -5.441e-03));
	r += mul(s2_2, M4(-5.702e-02, -1.392e-02, 2.288e-02, 9.423e-02, 1.703e-02, -8.309e-02, 8.809e-03, -1.677e-01, -1.509e-02, -4.547e-02, 4.150e-02, -3.334e-02, -1.208e-01, 9.868e-02, -8.768e-02, -1.492e-02));
	r += mul(s2_3, M4(-1.112e-02, 1.112e-01, -3.933e-02, 2.128e-02, -2.588e-03, 3.539e-01, 3.261e-02, -4.850e-02, 5.691e-02, -1.492e-01, 4.581e-02, 1.560e-02, 1.683e-01, -2.399e-02, 9.759e-02, -8.359e-02));
	r += mul(s2_4, M4(-9.395e-02, 3.051e-02, -2.703e-01, -5.509e-02, -7.402e-02, 1.713e-01, -1.217e-02, -1.481e-01, -1.499e-01, 9.432e-02, 1.353e-01, 1.110e-01, 7.470e-02, -1.484e-01, 2.865e-02, 1.123e-01));
	r += mul(s2_5, M4(-4.374e-02, 2.185e-01, -1.399e-02, -3.567e-02, 1.530e-01, 2.002e-01, 1.431e-02, -2.271e-01, -1.001e-01, -9.959e-03, -2.941e-02, -3.313e-02, 4.434e-02, -1.691e-02, 3.744e-02, 2.550e-01));
	r += mul(s2_6, M4(-7.543e-02, -1.643e-02, 3.870e-02, -2.994e-02, -9.913e-02, -4.311e-02, -7.535e-02, 7.141e-02, -1.986e-02, -3.983e-02, -1.601e-02, 1.515e-02, 5.399e-02, -1.822e-02, -1.145e-01, -1.189e-01));
	r += mul(s2_7, M4(5.299e-02, 5.771e-02, -1.905e-02, 4.469e-04, -8.914e-03, 1.371e-01, -3.636e-02, -1.195e-03, -3.347e-02, -3.162e-02, -3.516e-02, -1.002e-01, -1.821e-01, 1.202e-01, 8.073e-02, 1.828e-02));
	r += mul(s2_8, M4(-3.079e-02, 4.782e-02, 4.581e-02, 3.428e-02, 1.364e-02, 3.328e-02, 1.065e-01, -5.447e-02, 1.521e-02, 1.987e-03, -5.060e-02, -4.637e-02, -1.284e-01, 1.990e-02, -5.804e-02, 5.293e-02));
	r += mul(s3_0, M4(-3.279e-02, -1.138e-01, -7.613e-02, 5.656e-02, -2.444e-02, -4.692e-02, 2.756e-02, 3.238e-02, -5.189e-03, 6.887e-04, -2.633e-02, -4.697e-02, -7.190e-02, 1.854e-01, 1.255e-01, -5.488e-02));
	r += mul(s3_1, M4(8.669e-02, -3.164e-02, 8.179e-02, -2.261e-02, 9.947e-02, 7.810e-02, -2.032e-02, 9.205e-03, 1.738e-01, 2.339e-02, -3.427e-02, -4.536e-02, 2.671e-01, 1.905e-01, -1.258e-01, -1.761e-01));
	r += mul(s3_2, M4(9.081e-02, 2.051e-02, -6.132e-03, -2.126e-02, -1.383e-01, -1.578e-02, -2.328e-02, 3.190e-03, 1.567e-01, -6.032e-02, 8.671e-02, 1.130e-02, -1.476e-01, 1.344e-01, -1.120e-01, -5.130e-02));
	r += mul(s3_3, M4(-2.872e-02, 1.763e-02, -7.216e-02, -1.072e-01, -1.180e-01, -1.004e-01, -4.798e-02, -2.352e-02, 1.105e-01, -2.879e-02, 1.592e-02, 6.813e-03, -4.098e-01, -1.040e-01, -2.934e-01, 3.858e-02));
	r += mul(s3_4, M4(8.946e-02, 9.835e-02, -2.144e-01, 5.372e-02, -2.239e-01, -2.255e-02, -1.035e-01, -1.412e-01, -2.732e-01, 1.072e-01, 3.488e-02, 1.192e-01, 3.397e-01, 2.329e-01, 4.348e-02, -1.439e-01));
	r += mul(s3_5, M4(-1.008e-01, -5.632e-02, -7.657e-03, -2.069e-02, -1.341e-01, -2.471e-02, 7.401e-02, 4.367e-03, -1.240e-02, 1.633e-01, -1.791e-03, -1.558e-01, -1.800e-01, -2.511e-01, 1.134e-01, -1.670e-02));
	r += mul(s3_6, M4(-3.769e-02, 9.544e-03, 4.993e-02, 3.764e-03, 3.193e-02, -9.158e-02, -9.055e-02, 4.805e-02, 6.795e-02, 5.456e-02, -1.025e-01, 1.730e-02, 1.168e-02, 1.310e-01, -3.507e-02, 4.382e-02));
	r += mul(s3_7, M4(-5.501e-02, -3.820e-02, 5.317e-02, -6.017e-02, 1.382e-01, 1.471e-01, -1.624e-01, 2.640e-02, 2.001e-02, 1.615e-01, -6.277e-02, -9.653e-02, -2.665e-01, 2.116e-01, -1.292e-02, -5.260e-02));
	r += mul(s3_8, M4(4.992e-02, 9.520e-02, -3.869e-02, 7.041e-02, -1.096e-01, -7.984e-02, 6.372e-02, 4.116e-02, -5.481e-02, 6.716e-02, -7.373e-02, 3.300e-02, -1.699e-01, 1.113e-01, -5.838e-02, 2.048e-02));
	r += mul(s4_0, M4(3.478e-03, 1.604e-01, 1.143e-02, 3.950e-02, 2.978e-02, -2.053e-02, 1.382e-02, -3.209e-02, -4.983e-02, -1.550e-01, 1.172e-02, -1.038e-01, 5.117e-02, -4.133e-02, 1.129e-01, 1.274e-02));
	r += mul(s4_1, M4(1.712e-01, -3.888e-02, -1.187e-01, 6.333e-02, 2.066e-02, 1.027e-01, -6.894e-02, -9.356e-02, -1.740e-01, 4.635e-02, 1.101e-01, -9.743e-02, -1.694e-01, -3.618e-02, 1.157e-01, 9.724e-03));
	r += mul(s4_2, M4(1.616e-01, -8.447e-03, -3.057e-02, -5.442e-02, -1.433e-01, -5.447e-02, -4.839e-02, 3.747e-02, -2.974e-02, -2.371e-02, 8.349e-03, -7.386e-02, 2.599e-03, 1.992e-02, 5.651e-02, -9.889e-02));
	r += mul(s4_3, M4(-1.205e-01, 4.766e-02, -1.792e-01, -7.001e-03, -1.023e-01, 2.760e-03, -1.750e-01, 1.432e-02, 2.140e-02, -1.442e-02, -1.203e-02, 5.236e-03, 2.750e-02, 5.747e-02, 4.160e-02, 1.839e-02));
	r += mul(s4_4, M4(1.175e-01, -6.460e-02, -1.678e-01, -1.293e-01, 4.098e-02, -1.212e-01, -9.402e-02, -1.696e-01, -1.637e-01, -2.059e-02, -3.352e-02, 1.008e-01, -2.280e-01, -1.991e-02, 1.460e-01, -5.383e-02));
	r += mul(s4_5, M4(-5.260e-02, -6.286e-02, -7.651e-02, -3.258e-01, -8.880e-02, -6.865e-02, -2.491e-02, 5.650e-02, 3.969e-03, 4.130e-05, 7.006e-02, -8.126e-02, 3.293e-04, 1.805e-02, 1.934e-01, -4.173e-02));
	r += mul(s4_6, M4(2.222e-02, 5.658e-02, -1.776e-03, 3.331e-02, 2.624e-02, 4.201e-02, -1.271e-01, 1.976e-02, -7.570e-02, -5.879e-02, 1.233e-01, -4.147e-02, 5.968e-02, -1.057e-02, -4.891e-02, -6.360e-02));
	r += mul(s4_7, M4(7.232e-02, -1.316e-01, 8.325e-02, -3.296e-02, 1.187e-01, 2.971e-02, 8.751e-02, -3.254e-02, -1.432e-01, -2.923e-02, 1.748e-01, 7.771e-02, 1.595e-01, 1.106e-01, -1.131e-01, 1.774e-02));
	r += mul(s4_8, M4(-9.671e-02, -5.734e-02, -5.968e-02, -3.762e-02, 1.582e-02, 1.257e-02, 3.137e-02, -7.635e-04, -7.236e-02, -3.294e-02, -3.009e-02, -7.280e-02, -7.171e-02, -4.889e-03, -2.667e-02, -4.693e-03));
	r += mul(s5_0, M4(9.525e-03, 2.583e-01, 1.577e-01, -8.960e-02, -1.342e-01, 5.619e-02, 7.415e-02, -5.099e-02, 1.083e-02, -1.041e-02, 7.983e-02, 5.332e-02, -7.776e-03, -6.704e-02, 1.509e-01, 4.181e-02));
	r += mul(s5_1, M4(-2.083e-01, 1.158e-01, -2.234e-02, 7.118e-02, 5.266e-02, 4.810e-02, 9.202e-02, -1.003e-01, -1.734e-01, 4.296e-02, 1.714e-01, -2.401e-03, -1.137e-01, -1.756e-01, 6.021e-02, -2.741e-02));
	r += mul(s5_2, M4(6.815e-02, 5.817e-02, -6.170e-02, -4.732e-02, 3.478e-02, -4.199e-03, -1.851e-02, -3.221e-03, 8.744e-02, 5.604e-02, 5.947e-02, -1.328e-01, 1.943e-02, -8.131e-03, 1.900e-02, -7.607e-02));
	r += mul(s5_3, M4(2.857e-01, 1.539e-01, 1.038e-01, 1.132e-02, 1.329e-02, 2.174e-01, 1.244e-02, 8.200e-02, -4.505e-02, -5.227e-02, 4.619e-04, 6.776e-02, -5.088e-02, -3.256e-02, 6.318e-02, -4.579e-02));
	r += mul(s5_4, M4(-4.660e-02, -2.435e-01, -6.781e-03, -7.809e-02, 1.874e-01, -1.164e-01, -1.782e-01, -2.800e-01, 2.698e-02, 6.071e-02, -5.110e-02, 7.245e-02, 1.245e-02, 1.899e-01, 1.096e-01, -5.294e-02));
	r += mul(s5_5, M4(8.890e-02, 9.728e-02, 4.997e-02, -9.887e-02, 5.615e-02, 2.021e-02, -2.259e-02, -1.068e-01, -3.950e-02, -8.120e-02, -1.665e-02, -4.129e-02, -8.374e-02, -1.258e-01, 1.182e-01, -1.213e-02));
	r += mul(s5_6, M4(-8.262e-03, 1.664e-02, -7.024e-02, -3.985e-02, -2.808e-02, 1.221e-01, -1.890e-01, -1.087e-01, 1.692e-02, -1.162e-02, 2.958e-02, 2.775e-02, -6.859e-02, -1.296e-02, 1.193e-02, 4.396e-03));
	r += mul(s5_7, M4(8.172e-02, 1.117e-01, -1.482e-02, 5.308e-03, -5.901e-03, -5.041e-02, 4.588e-02, -1.821e-02, 8.305e-02, 1.224e-01, -6.272e-03, 1.969e-02, 4.009e-02, -3.353e-02, 6.791e-02, -1.372e-02));
	r += mul(s5_8, M4(-1.109e-01, -1.571e-02, -4.466e-02, 9.630e-02, 7.569e-03, 6.390e-02, 2.270e-02, 7.346e-03, 2.162e-02, 6.602e-02, -7.577e-02, 3.169e-02, 6.660e-03, -7.800e-02, -5.085e-03, 7.740e-02));
	r += mul(s6_0, M4(-1.017e-01, -4.034e-02, 6.708e-02, 7.748e-02, -7.052e-02, -6.251e-02, 6.019e-02, -4.987e-03, 1.404e-01, 1.748e-01, -1.077e-01, 8.488e-02, -5.884e-03, -4.451e-03, 2.979e-02, -3.478e-02));
	r += mul(s6_1, M4(-2.065e-02, 2.125e-01, 1.827e-01, -1.059e-01, 5.760e-02, 7.237e-02, 6.810e-02, 8.762e-03, -8.153e-02, 8.979e-02, -9.872e-03, 7.730e-02, 9.600e-02, -1.581e-01, 9.862e-02, -8.078e-02));
	r += mul(s6_2, M4(1.839e-02, 8.069e-02, -5.765e-02, -3.410e-02, -1.856e-01, 3.381e-02, -4.610e-02, -1.308e-02, 7.696e-02, -3.619e-02, 1.082e-01, -1.086e-01, 8.181e-02, -3.576e-02, 8.041e-03, -1.460e-01));
	r += mul(s6_3, M4(-1.003e-01, 6.476e-02, 4.396e-02, -1.430e-02, -1.837e-02, 1.834e-01, 1.998e-02, 6.159e-02, -9.878e-02, 1.229e-01, -9.909e-02, -8.428e-02, 2.370e-02, -1.355e-01, 1.162e-02, -9.837e-03));
	r += mul(s6_4, M4(1.037e-01, 1.753e-01, -1.921e-01, -6.578e-02, 6.742e-02, 3.040e-02, 1.255e-01, 6.624e-02, 8.632e-02, 1.224e-02, -7.217e-03, 9.856e-02, 1.402e-01, 1.209e-01, 1.964e-01, -2.759e-02));
	r += mul(s6_5, M4(3.978e-02, 2.199e-02, -2.567e-02, -1.525e-01, -1.993e-01, 9.225e-02, -8.903e-02, -1.027e-01, 1.432e-01, 7.147e-02, 3.857e-03, -6.097e-02, 1.333e-01, -4.337e-02, -7.006e-02, -2.442e-01));
	r += mul(s6_6, M4(-1.520e-01, -1.692e-02, 1.714e-02, 5.022e-02, -9.391e-02, 5.515e-03, 4.718e-02, -1.328e-02, -2.968e-02, -6.351e-02, -5.077e-02, 7.981e-02, 5.259e-02, -1.407e-02, 3.420e-02, -9.187e-02));
	r += mul(s6_7, M4(4.626e-03, 7.643e-03, 7.219e-02, -4.271e-02, 7.871e-02, 8.244e-03, 2.424e-01, 9.938e-02, 1.456e-01, -4.297e-03, -7.914e-02, -4.582e-02, 1.354e-01, -1.694e-02, -8.773e-02, 1.764e-02));
	r += mul(s6_8, M4(-4.360e-02, 6.883e-02, -3.494e-02, -3.672e-02, -1.503e-01, 5.198e-02, -1.430e-02, 2.389e-02, -7.594e-02, -1.904e-02, 5.872e-02, 3.285e-02, 1.975e-01, 1.726e-01, -9.447e-02, -9.414e-02));
	r += mul(s7_0, M4(3.853e-02, -1.717e-01, 2.571e-02, -4.859e-03, 4.389e-02, -1.690e-01, 1.320e-02, -3.390e-02, -1.307e-01, 5.201e-02, 2.112e-01, -5.495e-02, -3.004e-02, 5.965e-02, -5.489e-03, 4.998e-02));
	r += mul(s7_1, M4(-1.303e-03, 5.981e-03, 4.903e-02, 7.560e-03, 1.147e-01, -3.206e-02, -7.700e-02, -9.505e-02, 6.375e-02, 1.431e-01, 2.929e-01, -1.937e-01, -4.918e-02, 3.872e-02, 1.358e-01, 4.230e-02));
	r += mul(s7_2, M4(-1.391e-01, -6.461e-02, -4.030e-02, -3.580e-02, 1.159e-03, -8.988e-02, -1.543e-02, -1.233e-01, 1.509e-01, -5.529e-02, 1.104e-01, 1.352e-01, -5.030e-02, 5.567e-02, 1.109e-01, -2.720e-03));
	r += mul(s7_3, M4(-2.481e-02, -1.004e-01, 1.421e-02, -5.236e-03, -8.489e-03, -6.281e-02, -1.650e-02, 1.920e-02, 1.945e-02, -5.418e-02, -6.112e-02, -7.400e-02, -7.176e-02, 2.431e-02, -3.867e-02, -7.052e-03));
	r += mul(s7_4, M4(-4.605e-02, -1.893e-02, -2.272e-01, -6.262e-02, 1.103e-01, -2.067e-01, 9.526e-02, 1.047e-01, -1.670e-01, -1.132e-01, 1.711e-01, 7.622e-03, -7.388e-02, 1.010e-02, -1.279e-02, 4.707e-02));
	r += mul(s7_5, M4(3.021e-02, 8.185e-02, -1.230e-01, 2.231e-01, -3.762e-02, -2.493e-01, -2.623e-02, -2.778e-01, 6.918e-02, 1.902e-02, -9.916e-02, -2.807e-01, -4.177e-02, 3.213e-02, -5.395e-02, 5.346e-02));
	r += mul(s7_6, M4(-3.677e-02, 8.425e-03, 1.583e-02, -7.588e-03, 3.335e-02, 4.774e-02, -1.190e-02, 1.936e-02, -5.956e-02, -2.931e-02, -6.092e-02, 6.113e-02, 8.702e-03, -2.472e-02, -7.609e-03, -7.800e-02));
	r += mul(s7_7, M4(-1.198e-01, 6.918e-02, -2.854e-02, 2.875e-02, 8.723e-02, -1.937e-01, 2.986e-02, 1.310e-01, -6.265e-02, -1.409e-01, -1.194e-01, 2.090e-01, -1.643e-02, -1.501e-01, 4.103e-02, -1.420e-02));
	r += mul(s7_8, M4(2.341e-02, 8.679e-02, 1.332e-02, 7.991e-02, 1.440e-01, -8.666e-02, 5.631e-02, -8.778e-02, 1.794e-01, 4.280e-02, -1.685e-02, -1.721e-01, -4.744e-02, 4.625e-02, -3.583e-02, -2.333e-02));
	r += V4(2.939e-02, 1.703e-02, -4.544e-03, -7.752e-03);
	return r;
}

V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) {
	V4 r = 0.0;
	r += mul(s0_0, M4(6.320e-02, -7.718e-02, 9.506e-02, 7.016e-03, -6.464e-02, 2.906e-03, -1.199e-01, 9.856e-02, -8.741e-02, 3.326e-02, 1.542e-02, -5.129e-02, 1.470e-01, -7.230e-02, -6.427e-02, 3.123e-02));
	r += mul(s0_1, M4(-8.588e-02, -2.614e-02, 8.184e-02, 7.694e-02, -5.604e-02, 5.216e-02, 1.357e-01, 2.437e-01, -1.033e-01, 7.084e-02, 4.441e-02, 4.397e-02, -3.856e-02, -7.256e-02, 2.395e-02, 4.535e-03));
	r += mul(s0_2, M4(-4.604e-02, 1.596e-02, 1.392e-01, 3.868e-02, 5.121e-02, 2.543e-01, 7.318e-02, 7.010e-02, 3.419e-02, 3.113e-02, -1.747e-02, -2.473e-02, -2.759e-02, -9.824e-02, -7.014e-02, -6.002e-02));
	r += mul(s0_3, M4(-3.614e-02, -1.616e-01, -5.425e-02, 8.711e-02, -7.508e-02, 1.317e-01, 3.801e-02, 3.961e-02, -2.620e-03, -1.130e-02, -2.620e-02, 5.976e-02, -7.145e-03, 1.283e-01, -7.578e-02, -7.914e-02));
	r += mul(s0_4, M4(4.839e-02, -1.199e-02, -3.252e-02, 5.731e-02, -1.909e-01, 1.175e-01, -1.192e-01, 1.086e-01, -9.094e-02, 1.143e-01, -3.654e-02, -3.028e-02, -8.647e-02, -6.826e-02, -1.610e-01, -3.038e-03));
	r += mul(s0_5, M4(-1.978e-02, -1.834e-02, -4.260e-02, -1.173e-01, 1.760e-01, 1.418e-01, 4.347e-02, -2.425e-01, 3.787e-02, -2.291e-02, 5.158e-02, 6.051e-03, 2.596e-02, 7.399e-02, 1.628e-02, 7.668e-02));
	r += mul(s0_6, M4(-1.158e-02, -3.459e-02, -7.090e-03, 6.518e-02, 1.804e-02, -4.394e-02, -1.789e-02, 5.996e-03, -6.698e-03, 4.131e-02, 1.149e-01, -8.381e-02, 7.473e-02, 1.978e-03, 9.873e-02, 8.531e-03));
	r += mul(s0_7, M4(3.496e-02, 8.744e-02, 4.371e-02, -1.088e-01, -3.036e-02, -5.697e-02, 1.940e-02, -6.022e-03, 6.581e-03, 3.124e-03, 1.142e-02, -8.218e-02, 3.751e-02, -1.171e-01, -7.349e-03, -1.270e-01));
	r += mul(s0_8, M4(4.895e-02, 2.241e-02, 5.956e-03, -3.883e-02, -7.192e-02, 1.757e-01, 4.466e-02, 4.012e-02, -3.673e-02, -1.307e-02, -2.441e-02, -9.896e-02, -2.868e-02, -1.131e-02, 4.646e-02, 9.386e-02));
	r += mul(s1_0, M4(-1.430e-03, 3.561e-02, -3.690e-01, 1.564e-01, 3.685e-02, 2.425e-03, -1.631e-01, -4.238e-02, -2.123e-01, -5.178e-02, 9.378e-02, -7.436e-02, 6.296e-02, -6.532e-02, -2.221e-01, 6.686e-01));
	r += mul(s1_1, M4(-9.776e-03, -4.756e-02, -5.781e-02, 2.344e-01, -4.454e-02, -1.328e-01, -4.029e-02, 1.455e-01, -2.053e-01, 1.001e-02, 1.591e-01, -2.491e-01, -2.265e-01, -2.036e-01, -3.604e-02, -6.287e-03));
	r += mul(s1_2, M4(1.567e-01, 1.016e-01, -1.516e-01, -7.148e-02, 2.769e-02, 5.141e-02, 2.809e-02, -5.084e-02, -1.296e-01, 3.819e-02, 3.261e-01, -2.187e-01, 1.355e-02, -2.216e-01, 4.251e-03, 2.603e-02));
	r += mul(s1_3, M4(-5.761e-02, 1.639e-01, 1.786e-01, -3.273e-01, 9.615e-02, 6.864e-02, 9.650e-02, -4.877e-02, -4.408e-02, 7.343e-03, 7.796e-02, -2.653e-01, 1.835e-01, 1.345e-01, 9.666e-02, 2.262e-01));
	r += mul(s1_4, M4(-3.391e-01, 4.119e-02, 2.529e-01, -2.811e-01, -2.786e-03, -1.297e-01, -8.726e-02, 7.554e-02, -3.214e-01, -4.229e-02, 6.652e-02, -3.961e-01, -5.312e-02, -1.620e-01, 1.159e-02, 2.044e-01));
	r += mul(s1_5, M4(-8.320e-02, 1.444e-01, 1.536e-01, -3.765e-01, 2.449e-02, -3.528e-02, -1.436e-01, -9.440e-02, -2.857e-01, -3.807e-01, 1.571e-01, -1.162e-01, 2.310e-02, 1.582e-01, 1.143e-01, -1.543e-02));
	r += mul(s1_6, M4(-5.282e-02, 1.744e-01, 4.193e-02, 2.067e-01, -1.906e-02, 6.041e-02, 4.649e-02, -4.463e-03, -6.232e-02, 4.702e-02, 1.993e-01, -3.678e-01, 2.422e-01, 1.648e-01, 1.056e-01, 3.540e-01));
	r += mul(s1_7, M4(-2.235e-01, 5.221e-01, 1.819e-01, 5.619e-02, -1.677e-02, -1.433e-01, 1.799e-02, 1.457e-02, -1.292e-01, 1.292e-01, 2.298e-01, -3.856e-01, 1.253e-01, -2.538e-01, -2.143e-01, -3.131e-01));
	r += mul(s1_8, M4(-1.091e-01, 6.784e-02, -2.271e-01, -5.712e-02, -8.358e-02, -2.816e-02, -1.511e-02, -9.424e-02, -4.637e-02, -1.751e-01, 1.128e-01, -1.316e-01, 3.199e-03, 3.003e-02, -2.507e-03, 3.708e-02));
	r += mul(s2_0, M4(7.022e-02, 9.826e-03, 1.264e-01, -6.655e-02, -9.567e-02, 7.824e-02, -1.140e-01, -6.869e-02, -4.578e-02, -4.380e-02, 8.145e-02, 1.228e-02, 2.101e-02, -2.336e-02, 4.873e-02, -4.936e-02));
	r += mul(s2_1, M4(9.575e-03, 5.262e-02, -9.150e-02, 4.189e-02, -7.853e-02, 2.990e-02, -4.065e-03, -1.226e-01, -3.636e-04, 1.089e-02, 9.353e-02, 1.230e-02, 6.342e-03, -4.481e-02, -3.678e-02, -3.514e-03));
	r += mul(s2_2, M4(1.176e-01, -1.579e-01, 4.680e-02, 2.423e-02, -1.349e-01, -2.077e-01, 3.620e-02, -1.201e-01, -2.024e-02, -6.755e-03, 2.079e-02, 4.859e-02, 1.734e-01, 8.149e-03, 1.137e-01, 2.593e-03));
	r += mul(s2_3, M4(1.332e-02, -1.541e-01, -5.517e-02, -2.061e-02, 7.641e-02, -4.840e-02, -1.162e-01, -3.291e-01, -1.245e-01, 3.492e-02, -2.600e-02, 1.613e-01, 9.448e-02, 1.412e-01, 2.535e-02, -1.406e-01));
	r += mul(s2_4, M4(2.151e-01, -1.365e-02, 1.180e-02, 2.581e-02, 6.572e-02, -1.372e-02, -1.722e-01, -1.926e-01, 4.826e-02, -1.078e-01, -1.183e-01, -7.610e-02, -7.766e-03, 4.998e-02, -1.190e-02, -1.360e-02));
	r += mul(s2_5, M4(2.103e-01, -1.463e-02, -1.856e-01, 2.490e-02, 2.971e-02, -1.128e-01, -2.542e-01, -1.356e-01, -8.917e-03, 6.293e-02, -1.829e-02, -5.433e-02, 8.434e-02, -1.365e-01, -2.178e-01, -9.137e-02));
	r += mul(s2_6, M4(3.370e-01, -2.690e-01, -1.512e-02, -1.231e-01, -5.368e-02, -6.936e-02, 5.311e-02, -3.567e-02, 7.326e-02, -7.379e-02, 3.780e-03, -4.926e-03, -5.118e-03, 4.603e-02, 1.331e-02, 7.783e-02));
	r += mul(s2_7, M4(1.102e-01, -4.355e-02, -1.842e-01, -4.951e-02, -2.790e-02, -2.210e-01, -4.385e-02, -1.147e-01, 3.262e-02, 2.523e-02, -3.362e-02, 5.086e-02, -7.375e-02, 5.865e-03, 7.601e-02, 1.326e-01));
	r += mul(s2_8, M4(2.669e-01, -2.111e-01, 7.244e-03, -7.698e-02, -2.844e-02, -4.207e-02, -3.568e-03, -6.819e-02, 3.351e-02, 2.784e-03, 2.415e-04, -2.362e-02, -2.661e-02, 2.854e-02, -2.674e-02, -6.056e-02));
	r += mul(s3_0, M4(3.955e-02, 2.142e-02, -2.566e-02, 1.080e-02, 4.278e-02, -7.498e-02, 3.332e-02, 1.456e-02, -3.917e-03, -3.820e-02, -1.247e-01, -6.262e-02, -2.027e-02, -1.745e-01, -1.151e-01, -1.639e-02));
	r += mul(s3_1, M4(5.990e-02, 4.835e-02, -1.038e-01, 7.007e-02, -2.788e-02, 2.322e-03, 1.116e-01, -9.015e-02, 4.873e-02, -7.391e-02, 1.696e-02, 4.602e-02, -2.696e-01, -1.999e-01, -1.103e-01, -1.714e-02));
	r += mul(s3_2, M4(3.210e-02, -8.543e-02, -8.054e-02, -1.211e-02, 1.079e-02, 3.980e-02, 6.774e-02, -3.586e-02, -5.813e-02, -9.956e-02, -1.907e-02, -8.042e-02, -1.100e-01, 1.353e-01, -4.048e-02, 2.285e-02));
	r += mul(s3_3, M4(-2.530e-02, -3.663e-02, 5.284e-02, -7.050e-02, -3.852e-02, 6.278e-03, -7.247e-02, -2.077e-02, 7.670e-04, 4.137e-02, 4.632e-02, 4.830e-02, 2.586e-01, 6.367e-02, -6.495e-02, -3.022e-02));
	r += mul(s3_4, M4(-2.026e-01, 1.004e-01, 2.051e-01, 1.364e-02, 1.316e-01, 9.292e-02, -5.945e-02, 1.091e-01, 1.033e-01, -1.929e-01, -1.421e-01, -5.142e-02, -1.676e-02, -2.549e-01, 2.412e-01, -3.018e-01));
	r += mul(s3_5, M4(-3.704e-02, -6.964e-02, 6.598e-03, 8.814e-02, 7.165e-02, -4.431e-02, -1.288e-01, -5.984e-02, 3.779e-02, -1.225e-01, -3.971e-03, 5.088e-02, -5.586e-02, -7.200e-02, 2.022e-02, -1.665e-01));
	r += mul(s3_6, M4(-1.237e-01, -1.838e-01, 1.462e-02, -7.791e-02, 1.130e-01, 6.404e-02, 5.346e-02, -4.299e-04, 1.870e-02, 4.440e-02, 3.492e-03, 1.038e-01, -6.858e-02, -2.071e-02, -2.566e-02, 4.679e-02));
	r += mul(s3_7, M4(-7.821e-02, 9.331e-02, -6.336e-02, 8.823e-02, -4.691e-03, -1.123e-02, -4.472e-02, 1.027e-01, -5.471e-02, 8.144e-02, 1.998e-02, 1.143e-01, -2.183e-01, 7.181e-02, -3.760e-02, -8.993e-02));
	r += mul(s3_8, M4(5.917e-02, -2.845e-02, -2.938e-02, -3.687e-02, -1.116e-01, -1.184e-01, 1.099e-01, 1.020e-01, 2.030e-03, 1.817e-03, -3.595e-02, 4.143e-04, -5.942e-02, 3.027e-02, -4.019e-02, 9.432e-02));
	r += mul(s4_0, M4(-4.261e-02, 1.766e-02, 9.293e-02, -5.081e-02, 9.748e-02, -6.530e-02, -1.216e-01, 1.010e-02, -2.167e-01, -7.010e-03, 2.974e-02, -4.901e-02, -2.646e-03, 1.033e-02, -6.713e-02, -1.442e-01));
	r += mul(s4_1, M4(7.373e-02, -6.465e-02, -7.503e-02, -7.127e-02, -1.255e-01, 1.340e-02, -1.245e-01, 3.496e-02, -1.546e-01, -3.533e-02, -5.367e-02, -8.579e-03, 1.241e-01, 1.731e-02, -1.479e-02, -2.206e-02));
	r += mul(s4_2, M4(5.265e-02, -7.941e-02, 5.017e-02, -2.362e-02, 2.740e-02, 6.718e-02, 1.580e-02, -4.146e-02, -5.366e-02, -3.145e-02, 1.035e-01, 2.084e-02, 1.680e-02, -9.646e-02, -1.065e-01, -1.834e-02));
	r += mul(s4_3, M4(8.164e-02, 7.382e-03, 5.676e-02, 7.940e-02, 8.327e-02, 7.134e-02, 1.251e-01, -1.103e-02, -1.549e-01, -6.383e-02, 1.144e-02, -7.174e-02, 3.976e-02, 8.730e-02, -1.003e-01, -5.632e-02));
	r += mul(s4_4, M4(1.269e-01, -1.364e-01, 2.985e-01, 4.161e-02, 3.140e-03, 9.006e-02, -1.225e-02, 2.627e-01, -3.744e-01, 4.081e-02, 9.455e-02, 3.477e-02, -9.757e-03, 8.997e-03, -2.210e-01, 2.177e-01));
	r += mul(s4_5, M4(2.005e-02, 7.934e-02, -8.341e-03, 5.906e-02, 4.050e-02, 8.305e-02, -6.599e-02, 4.685e-02, -3.776e-02, -1.510e-01, 3.013e-02, -1.222e-02, -1.127e-01, 1.073e-02, -1.633e-02, -1.180e-01));
	r += mul(s4_6, M4(-3.381e-02, -4.626e-02, 3.144e-02, -6.961e-02, 9.662e-02, -1.304e-02, -4.102e-02, 6.509e-02, -1.617e-01, -1.666e-02, 2.633e-02, 1.079e-01, 1.275e-01, 2.305e-01, 1.164e-01, 5.076e-02));
	r += mul(s4_7, M4(8.643e-02, -1.511e-01, -6.230e-02, -6.197e-02, -6.672e-02, 2.599e-02, -7.017e-02, 5.437e-02, -1.406e-01, 3.464e-02, -8.536e-02, -5.700e-02, -8.383e-02, 1.391e-01, 3.891e-01, 8.632e-02));
	r += mul(s4_8, M4(4.181e-02, -4.849e-02, -7.718e-02, 7.518e-02, 1.577e-03, -1.840e-02, -9.197e-03, -4.337e-02, -1.126e-01, 5.666e-02, 1.596e-03, -1.028e-01, -1.218e-01, 6.298e-02, 1.049e-01, 8.461e-02));
	r += mul(s5_0, M4(-1.387e-01, 4.819e-03, 3.064e-02, -1.743e-01, 1.076e-01, -4.823e-02, -4.510e-02, 4.724e-02, -9.852e-03, 2.309e-03, -2.033e-02, -1.944e-02, 2.860e-03, 2.271e-02, -4.729e-02, -9.371e-02));
	r += mul(s5_1, M4(-7.357e-02, -5.027e-02, -1.819e-01, -5.641e-02, 8.112e-02, -4.896e-02, -9.809e-02, 5.778e-03, 4.078e-02, -6.831e-02, -1.898e-01, -1.351e-02, 1.226e-01, 6.178e-02, -4.512e-02, -2.158e-02));
	r += mul(s5_2, M4(5.850e-02, -5.272e-02, 1.044e-01, -7.582e-02, 1.749e-02, 4.037e-02, 6.002e-02, -6.747e-03, 1.003e-01, -9.924e-02, -9.152e-02, 5.488e-03, -1.101e-02, -7.243e-02, -1.296e-01, 1.558e-02));
	r += mul(s5_3, M4(6.173e-02, 1.755e-01, -7.152e-02, -3.348e-01, 3.104e-01, -6.098e-02, 2.352e-01, -3.734e-01, 5.136e-02, 4.157e-02, 5.028e-02, -1.600e-01, -1.269e-01, 4.136e-02, -1.563e-02, -6.619e-02));
	r += mul(s5_4, M4(-1.144e-02, 2.076e-01, 6.527e-02, -1.160e-01, -1.525e-01, 8.878e-02, 2.254e-01, 1.486e-01, -1.173e-01, -2.311e-02, 3.828e-02, -3.514e-02, -1.886e-01, 1.043e-01, -1.510e-01, 7.074e-02));
	r += mul(s5_5, M4(2.749e-02, -6.254e-02, -1.639e-01, -2.591e-01, 7.234e-02, -1.603e-03, -9.239e-02, 4.716e-02, 2.100e-02, -1.405e-01, 1.463e-01, 8.715e-02, -1.764e-01, 1.204e-01, 3.718e-02, 3.813e-02));
	r += mul(s5_6, M4(2.766e-02, 1.623e-01, 1.378e-01, -1.562e-01, 1.616e-01, 8.169e-02, 5.208e-02, -1.288e-01, 6.320e-02, 9.176e-02, 1.180e-02, 1.137e-01, 8.061e-02, -1.175e-01, 6.469e-02, -1.322e-02));
	r += mul(s5_7, M4(-1.880e-01, 2.300e-02, 1.062e-01, 1.907e-02, -9.183e-02, -8.551e-02, 9.594e-02, 1.451e-01, 1.138e-01, 1.241e-01, -7.151e-02, 9.592e-02, 5.135e-02, -8.747e-02, -8.409e-02, -6.575e-02));
	r += mul(s5_8, M4(-2.020e-01, -1.719e-01, -6.394e-02, 1.819e-02, 8.297e-02, -9.173e-02, 5.302e-02, -9.164e-02, 1.000e-01, -1.038e-01, -8.130e-02, 1.412e-01, -1.411e-01, 1.092e-01, 2.095e-02, 9.288e-02));
	r += mul(s6_0, M4(-4.180e-02, -5.188e-02, 9.579e-02, -2.742e-02, -9.921e-04, 1.066e-02, 9.985e-02, -4.637e-02, 6.303e-02, -1.439e-02, 4.576e-02, -2.195e-02, 4.854e-02, -1.255e-01, -8.582e-03, 6.013e-02));
	r += mul(s6_1, M4(-3.422e-02, 2.045e-02, -4.093e-02, -1.336e-01, -8.530e-02, 5.189e-02, 1.427e-01, -1.012e-01, 1.486e-01, -1.828e-02, -9.215e-02, 2.158e-02, 1.942e-02, -1.541e-02, -1.068e-01, -3.169e-02));
	r += mul(s6_2, M4(-3.700e-02, -1.217e-01, 1.191e-01, -4.359e-03, -2.485e-02, 1.825e-02, 1.838e-01, -3.668e-02, 2.293e-02, 3.977e-02, -8.759e-02, 4.553e-03, -6.276e-02, -2.628e-02, -3.391e-02, 1.566e-04));
	r += mul(s6_3, M4(-1.024e-02, -2.777e-02, 4.128e-02, -6.782e-02, -3.726e-03, -5.963e-02, -9.420e-03, -9.402e-02, -8.151e-02, -9.414e-02, -3.991e-02, 7.724e-02, -7.746e-02, -7.503e-02, 7.464e-02, -8.905e-02));
	r += mul(s6_4, M4(-2.709e-01, 2.423e-01, 2.496e-01, 2.597e-03, -1.579e-01, 2.001e-01, -1.406e-01, -7.105e-02, 3.040e-01, -3.878e-02, -1.402e-01, -6.004e-02, -8.634e-02, -1.154e-01, 7.337e-02, -5.099e-02));
	r += mul(s6_5, M4(8.580e-02, -1.532e-01, 5.976e-02, 2.598e-01, -1.812e-02, 2.478e-01, -1.218e-02, 1.008e-01, -3.848e-02, -3.495e-02, 1.092e-03, -1.450e-01, -9.653e-02, -9.008e-02, 4.896e-02, 7.095e-02));
	r += mul(s6_6, M4(-1.837e-01, -1.233e-01, -1.180e-02, -1.169e-02, -1.061e-01, -2.287e-02, -8.552e-03, -5.326e-02, 7.852e-02, 7.589e-03, 2.108e-02, -4.405e-03, 3.078e-02, -9.147e-02, -1.023e-01, -1.030e-02));
	r += mul(s6_7, M4(1.237e-01, -2.445e-01, -2.362e-01, -2.529e-02, -7.137e-02, -5.691e-04, -7.231e-02, -1.206e-01, -5.707e-02, 8.429e-02, -2.818e-02, -5.455e-03, -1.368e-01, 1.412e-01, 1.685e-01, 1.579e-01));
	r += mul(s6_8, M4(8.906e-02, -7.479e-03, -4.303e-02, 4.799e-02, -8.465e-02, 7.594e-02, -1.030e-02, -1.335e-01, -8.414e-03, -4.142e-02, -7.459e-03, 3.904e-02, -1.490e-01, -8.325e-04, 6.042e-02, 1.324e-01));
	r += mul(s7_0, M4(4.533e-02, -5.916e-02, -9.127e-02, -4.446e-02, 8.467e-02, -1.642e-02, 9.352e-02, 3.970e-03, -1.636e-01, -1.423e-01, 4.556e-03, -1.125e-01, -4.726e-02, 8.311e-02, 5.499e-02, 3.963e-02));
	r += mul(s7_1, M4(4.547e-02, 3.472e-03, -8.499e-02, 6.881e-02, 8.495e-02, -8.364e-02, -4.263e-02, 1.058e-01, -9.016e-02, -7.198e-02, -1.056e-01, -6.057e-03, -4.490e-02, 1.829e-02, 4.157e-02, -8.081e-02));
	r += mul(s7_2, M4(-8.375e-02, -1.000e-02, -1.144e-02, 5.829e-02, 5.197e-02, -1.701e-01, -2.617e-02, -3.956e-02, 2.774e-03, -6.882e-02, -1.381e-01, -2.288e-01, -6.002e-02, 3.220e-02, -1.218e-03, 6.675e-02));
	r += mul(s7_3, M4(5.483e-02, -1.547e-02, -6.716e-03, -1.510e-01, 7.996e-02, -1.777e-02, 4.348e-02, 8.002e-02, 1.035e-01, -9.894e-03, -2.000e-02, 1.043e-01, -9.879e-02, -1.135e-01, -2.837e-02, 1.051e-01));
	r += mul(s7_4, M4(-2.424e-01, 2.243e-01, -3.853e-02, 2.895e-01, 2.039e-01, -1.486e-01, -3.057e-02, -6.787e-02, 1.795e-02, -1.118e-01, -2.165e-01, -4.785e-02, 5.191e-02, 1.092e-01, -1.205e-01, 1.643e-01));
	r += mul(s7_5, M4(2.205e-01, -2.171e-02, 4.996e-02, 9.916e-02, 5.749e-02, 3.081e-02, 8.032e-02, 1.415e-01, -2.332e-01, 1.230e-02, -1.489e-02, 2.408e-02, 4.951e-02, -7.304e-02, 3.857e-02, 3.996e-03));
	r += mul(s7_6, M4(-6.612e-02, 1.535e-02, -8.325e-02, 9.137e-02, 2.110e-02, 2.828e-02, -3.476e-02, -3.593e-02, 6.483e-03, -6.664e-02, 1.464e-01, 3.244e-03, -4.637e-02, -3.400e-02, 8.721e-03, -2.291e-02));
	r += mul(s7_7, M4(1.081e-01, -8.187e-03, -5.687e-02, 5.623e-02, 9.695e-02, 9.866e-03, -3.664e-02, -7.675e-03, 2.581e-02, 7.745e-02, 5.536e-02, -7.075e-02, -1.414e-01, -5.800e-02, -1.569e-02, 4.333e-02));
	r += mul(s7_8, M4(6.782e-02, 2.670e-02, -1.368e-02, -1.088e-01, -6.193e-02, -5.791e-02, 8.965e-02, -6.801e-03, 2.512e-02, -3.296e-02, 1.083e-01, 3.109e-02, 2.950e-02, 2.437e-02, -2.642e-02, 8.794e-03));
	r += V4(-1.256e-03, -1.254e-02, 2.979e-02, 1.394e-02);
	return r;
}

V4 f2(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) {
	V4 r = 0.0;
	r += mul(s0_0, M4(3.887e-02, 3.025e-02, 1.517e-03, -3.288e-02, -1.083e-01, -4.845e-02, 2.170e-02, 4.413e-02, 2.451e-02, -6.292e-02, -2.513e-02, 5.728e-02, 3.625e-02, 7.753e-02, 7.297e-02, -5.741e-02));
	r += mul(s0_1, M4(-1.728e-02, -5.232e-02, -8.858e-02, -2.687e-02, -1.604e-02, -6.041e-02, -1.106e-01, 7.307e-02, -3.834e-02, 1.996e-02, -1.364e-02, -1.310e-02, -1.076e-01, -1.056e-01, -1.804e-02, -1.684e-02));
	r += mul(s0_2, M4(1.105e-01, -2.900e-02, 5.276e-02, -5.441e-03, -7.289e-02, 1.312e-01, 1.417e-01, -7.838e-03, -9.457e-02, 3.891e-04, 6.369e-02, -1.775e-02, 9.250e-02, 1.866e-02, -9.643e-02, 4.267e-02));
	r += mul(s0_3, M4(-1.042e-01, 4.142e-02, -1.478e-01, -4.082e-02, -2.004e-01, -4.275e-02, 5.319e-02, 3.056e-02, -9.493e-03, 9.243e-03, -8.440e-02, -1.425e-02, -2.520e-02, -5.825e-02, 5.914e-02, -2.031e-02));
	r += mul(s0_4, M4(-1.251e-01, -1.828e-01, -3.615e-02, 9.177e-02, -2.178e-01, 1.441e-01, -1.189e-01, -4.995e-02, -6.180e-02, 2.246e-02, 2.158e-02, 2.159e-03, -1.302e-01, -7.711e-02, -1.093e-01, -1.144e-02));
	r += mul(s0_5, M4(1.565e-02, 2.006e-02, -2.840e-02, 4.644e-02, -1.280e-01, 7.514e-02, 6.596e-02, -1.821e-01, 3.658e-02, 8.469e-03, 5.564e-02, -5.136e-02, 2.397e-02, -1.296e-02, -4.520e-02, -1.471e-02));
	r += mul(s0_6, M4(-3.172e-02, 4.281e-02, 5.391e-04, -7.864e-02, -1.542e-02, 1.393e-02, 8.824e-02, -5.775e-02, -1.202e-02, 6.912e-02, 1.232e-02, 7.675e-03, -4.605e-03, 7.013e-03, -1.183e-02, -1.676e-02));
	r += mul(s0_7, M4(-1.415e-01, -5.271e-02, -5.525e-02, 6.507e-02, -3.757e-02, 3.016e-02, -1.386e-02, -4.343e-02, 1.544e-03, 1.466e-02, -1.114e-01, 7.776e-02, -2.544e-03, 1.628e-02, 1.985e-02, 5.872e-02));
	r += mul(s0_8, M4(3.167e-02, -7.367e-02, -4.462e-02, 3.674e-02, 5.715e-02, -3.717e-02, -5.898e-02, 2.154e-02, 1.026e-01, -8.293e-03, -4.104e-02, 9.261e-04, 2.202e-02, 2.020e-02, -7.776e-03, -8.210e-03));
	r += mul(s1_0, M4(-2.467e-01, -1.944e-02, 8.368e-02, 7.498e-02, 7.499e-02, 2.208e-03, 9.049e-02, -8.912e-02, -8.240e-02, 6.174e-02, 3.425e-02, -4.272e-02, -1.418e-01, 2.797e-01, 2.659e-01, -5.863e-02));
	r += mul(s1_1, M4(6.391e-02, -6.175e-02, -1.784e-02, -4.688e-03, 1.535e-01, -3.568e-02, 3.525e-02, 2.541e-02, -1.344e-01, 3.259e-01, -3.196e-02, -6.148e-02, -3.933e-02, 1.929e-01, -1.274e-02, -2.005e-01));
	r += mul(s1_2, M4(-7.743e-02, 1.428e-01, 1.349e-01, -5.239e-02, 1.095e-01, 4.970e-02, -1.311e-02, -1.640e-03, 1.850e-02, 1.172e-01, -2.602e-02, -4.640e-02, -1.370e-01, 8.976e-02, -8.676e-02, -7.201e-02));
	r += mul(s1_3, M4(-1.673e-01, 1.323e-01, 1.950e-02, -1.240e-02, 8.475e-02, 2.113e-02, 8.506e-02, -5.274e-02, 1.806e-01, 1.457e-01, 2.915e-01, -1.440e-01, -1.149e-01, -1.061e-01, -2.980e-02, 6.842e-02));
	r += mul(s1_4, M4(-2.799e-01, 2.112e-02, -2.612e-01, -1.700e-01, 3.217e-02, 1.276e-01, -9.203e-02, 2.761e-02, -2.181e-01, 5.061e-01, 3.991e-01, -1.106e-01, -2.741e-02, -2.481e-01, 2.170e-01, -2.852e-01));
	r += mul(s1_5, M4(3.487e-01, 7.214e-02, 4.494e-02, 5.371e-02, 8.193e-02, 9.480e-02, -1.073e-01, 2.985e-02, 1.997e-02, 7.729e-02, 9.772e-02, -7.332e-02, -2.498e-01, 6.762e-02, 2.293e-02, -2.888e-01));
	r += mul(s1_6, M4(4.480e-02, 1.056e-01, 1.198e-01, -1.012e-01, 1.828e-01, -7.560e-02, 3.066e-02, -6.429e-02, 8.751e-02, 3.170e-02, 7.807e-02, -3.122e-02, 9.162e-02, -1.106e-02, -1.311e-02, 9.599e-02));
	r += mul(s1_7, M4(3.818e-02, -1.433e-01, 3.782e-01, -2.188e-02, 3.919e-02, -8.324e-02, -3.761e-02, 1.588e-02, -1.014e-01, 1.850e-01, 2.286e-02, 3.918e-02, 1.407e-01, -6.814e-02, 1.631e-01, 1.562e-02));
	r += mul(s1_8, M4(-4.358e-04, -9.482e-02, 2.193e-01, -2.293e-01, 2.554e-01, -9.018e-02, -8.484e-02, 5.927e-02, 7.714e-02, 1.900e-01, 3.396e-02, 1.056e-02, -7.022e-02, 1.926e-02, 4.448e-02, 4.663e-02));
	r += mul(s2_0, M4(1.784e-02, -7.199e-02, -5.824e-02, 6.873e-02, -3.191e-04, -9.134e-02, 8.370e-02, -3.068e-02, -5.419e-02, -6.458e-02, 6.291e-03, 3.031e-02, -4.128e-02, 5.380e-02, 4.977e-02, -7.324e-03));
	r += mul(s2_1, M4(1.181e-02, -1.516e-01, -9.786e-02, 7.405e-02, 1.638e-01, -4.243e-02, 1.167e-01, 5.082e-02, 1.001e-01, 1.146e-01, -1.138e-01, 3.022e-02, -4.277e-02, 1.614e-02, 9.942e-02, -7.122e-03));
	r += mul(s2_2, M4(1.158e-01, -1.022e-01, -2.080e-02, -7.057e-02, 5.919e-02, -1.602e-01, -5.761e-02, 6.987e-02, 1.531e-01, -6.507e-02, -8.643e-02, 9.661e-02, -4.889e-02, -4.945e-03, -1.143e-02, -2.502e-02));
	r += mul(s2_3, M4(-1.857e-01, 1.340e-01, -2.733e-01, 1.031e-01, -4.808e-02, 1.324e-01, 4.173e-02, 3.009e-02, -1.060e-01, -2.040e-02, 9.355e-03, 6.253e-02, 5.537e-02, 8.865e-02, 1.822e-01, -1.006e-01));
	r += mul(s2_4, M4(-9.784e-02, 2.023e-02, -1.142e-01, -2.146e-01, -3.103e-02, 1.708e-02, 1.423e-01, 3.270e-02, -1.197e-01, 2.456e-01, -1.454e-01, 3.783e-01, 1.439e-02, -4.989e-03, 8.481e-02, 5.491e-03));
	r += mul(s2_5, M4(1.557e-01, -1.260e-02, 9.447e-02, 7.943e-02, 2.710e-02, 5.271e-02, 2.650e-02, 7.770e-02, 7.340e-02, -5.755e-02, -3.267e-03, 8.076e-02, -7.777e-02, 6.201e-02, 2.384e-02, -6.711e-03));
	r += mul(s2_6, M4(-1.337e-01, 2.017e-01, 1.967e-01, 8.228e-02, -1.399e-03, -1.107e-01, 1.128e-02, 8.017e-02, -2.114e-02, 2.236e-02, 3.746e-02, 1.154e-01, 7.151e-03, -4.103e-02, 2.060e-01, 3.782e-02));
	r += mul(s2_7, M4(1.582e-01, 4.512e-02, 1.201e-01, 3.814e-02, -8.135e-02, -5.291e-03, -5.477e-03, 9.017e-02, 1.552e-01, -3.701e-02, -7.238e-02, 1.975e-01, 4.302e-02, -3.722e-02, -7.104e-02, 1.231e-01));
	r += mul(s2_8, M4(-1.981e-02, 1.215e-01, -6.274e-02, 5.424e-02, 9.411e-02, 6.553e-02, -7.473e-02, 1.218e-01, 4.111e-03, 1.296e-02, -1.339e-02, 6.284e-02, -3.329e-02, 4.491e-03, -4.000e-02, 2.687e-02));
	r += mul(s3_0, M4(1.900e-02, 3.132e-02, -1.107e-02, 2.916e-02, -2.324e-02, -3.320e-02, -7.677e-02, -3.622e-03, -1.094e-01, 2.387e-02, 8.828e-03, 4.683e-03, 2.363e-01, 6.495e-02, 6.438e-02, -4.063e-03));
	r += mul(s3_1, M4(1.306e-02, -7.558e-03, 8.329e-04, 8.291e-02, 1.002e-01, 6.220e-02, 3.872e-02, 2.616e-02, -6.577e-02, 1.472e-01, -1.272e-01, 1.232e-02, 1.216e-01, 3.466e-02, -5.328e-02, 3.051e-02));
	r += mul(s3_2, M4(2.935e-02, -8.973e-02, 6.832e-02, -2.761e-02, 3.865e-02, 2.328e-02, -7.397e-02, -1.242e-02, 2.967e-02, 1.057e-01, -1.049e-01, 1.078e-01, -6.954e-02, 1.442e-02, 1.111e-01, -4.695e-02));
	r += mul(s3_3, M4(2.117e-02, 6.500e-02, -7.845e-02, 8.502e-04, -6.699e-02, 3.160e-02, -8.446e-02, -3.742e-02, -1.103e-01, 4.940e-02, 1.327e-01, 6.795e-02, 2.514e-01, 1.525e-02, 1.133e-01, -7.580e-02));
	r += mul(s3_4, M4(-1.365e-01, -1.377e-02, 2.066e-02, -3.622e-02, 1.422e-01, -1.854e-01, 1.101e-01, 9.160e-02, -2.666e-01, 2.378e-01, -2.834e-01, 2.074e-01, 6.809e-01, -6.729e-02, 1.868e-01, 2.246e-01));
	r += mul(s3_5, M4(7.356e-02, -9.508e-02, -1.172e-04, 5.422e-02, -1.893e-02, 6.854e-02, -1.892e-02, 6.860e-03, -2.202e-02, -9.934e-02, 2.023e-02, 1.135e-01, 8.935e-02, 1.218e-01, -6.803e-02, 5.408e-02));
	r += mul(s3_6, M4(1.426e-01, -9.283e-02, 2.520e-02, -5.103e-02, 1.056e-01, -1.709e-02, 9.450e-02, -2.310e-02, -1.719e-02, 6.322e-02, 2.448e-02, 8.671e-02, 9.691e-02, -1.051e-01, -2.729e-02, 2.363e-02));
	r += mul(s3_7, M4(-9.781e-02, -1.276e-01, -1.571e-01, 1.159e-02, -4.129e-03, -1.075e-01, 1.402e-01, -1.880e-02, 1.100e-01, -4.011e-02, -1.048e-02, 1.545e-01, 3.196e-01, -1.222e-01, 3.091e-02, 5.484e-02));
	r += mul(s3_8, M4(-2.461e-02, -1.011e-01, -2.695e-02, 2.886e-02, 4.451e-02, -5.603e-02, -7.350e-02, 1.154e-02, 6.965e-02, 1.972e-02, -5.793e-03, 5.401e-02, 6.273e-02, -1.393e-01, 6.810e-02, -1.290e-02));
	r += mul(s4_0, M4(-7.404e-02, 5.460e-02, -4.076e-02, -3.550e-02, 6.267e-02, 3.118e-02, 3.598e-02, -1.232e-02, 7.524e-02, -1.605e-02, -8.208e-02, 2.116e-02, 1.133e-01, -4.855e-02, -1.767e-02, 6.122e-03));
	r += mul(s4_1, M4(-2.916e-02, -3.449e-02, -1.089e-02, 8.319e-03, -4.846e-02, -1.997e-01, 2.053e-03, 5.580e-02, -6.975e-02, -8.495e-02, -4.238e-03, 4.718e-02, 3.513e-02, 6.665e-02, 6.092e-02, 5.555e-02));
	r += mul(s4_2, M4(1.128e-01, 4.959e-02, -3.970e-02, 2.896e-03, 1.092e-02, 2.103e-02, -3.434e-03, -3.586e-02, 1.436e-02, 4.328e-02, -4.943e-03, 2.167e-02, -1.454e-02, -1.050e-02, 3.731e-03, -1.429e-02));
	r += mul(s4_3, M4(5.178e-02, 2.050e-02, -1.067e-01, -1.003e-02, 1.600e-01, 1.565e-01, -1.674e-01, 1.675e-02, 1.097e-01, -8.281e-03, -4.795e-02, -6.924e-03, 1.339e-01, -7.568e-02, -8.288e-02, 9.900e-02));
	r += mul(s4_4, M4(-4.674e-02, -2.970e-02, -2.418e-01, -4.915e-04, 3.764e-03, -1.025e-01, -1.236e-01, -2.139e-01, -6.857e-02, 5.994e-03, -1.035e-01, -7.110e-02, -1.443e-01, -6.694e-02, 1.153e-01, -3.853e-02));
	r += mul(s4_5, M4(7.134e-02, 1.370e-02, -2.476e-02, 7.440e-02, 3.270e-02, -2.947e-02, 1.667e-02, 8.067e-03, 2.572e-02, 1.023e-01, -7.215e-02, -3.183e-03, -7.987e-02, -4.945e-02, -1.032e-04, 4.923e-02));
	r += mul(s4_6, M4(2.903e-02, -5.938e-03, -4.288e-02, -2.255e-02, 2.899e-02, 6.334e-03, 2.114e-02, 3.533e-02, 8.110e-03, -9.280e-02, -1.825e-02, -4.875e-02, 2.026e-01, -7.598e-02, -3.330e-03, -6.580e-02));
	r += mul(s4_7, M4(-1.462e-01, 3.489e-02, 3.617e-02, -1.989e-02, 5.885e-02, -2.127e-04, -1.736e-02, 8.010e-03, -7.144e-03, -6.051e-02, -9.249e-02, -8.694e-02, -6.210e-02, -1.074e-01, 2.473e-01, -5.482e-02));
	r += mul(s4_8, M4(1.274e-01, 4.183e-02, 2.901e-02, 3.209e-03, -8.022e-02, 1.969e-02, 2.072e-02, -2.608e-02, 5.421e-02, -1.053e-01, 2.146e-02, -5.179e-02, 7.973e-02, -5.988e-02, 1.210e-01, -4.631e-03));
	r += mul(s5_0, M4(2.636e-02, 8.165e-03, 1.182e-01, -6.995e-02, -6.976e-02, 2.215e-03, -5.314e-02, 8.933e-02, -4.300e-02, 2.515e-02, -8.037e-02, 6.770e-03, -9.013e-02, -1.090e-02, -1.293e-02, 7.325e-02));
	r += mul(s5_1, M4(2.783e-02, 1.057e-01, 1.285e-01, -4.333e-02, -2.084e-02, -1.239e-02, 1.368e-02, 1.511e-02, -1.975e-02, 9.784e-02, 7.017e-02, -2.711e-02, 5.978e-02, -2.261e-02, 2.617e-02, 2.476e-02));
	r += mul(s5_2, M4(5.491e-02, 9.302e-02, -6.994e-02, -3.273e-02, -7.603e-02, 5.919e-02, -2.470e-02, 5.787e-02, -1.126e-01, 1.712e-01, 9.627e-03, -2.791e-02, -6.110e-02, -5.188e-02, 3.332e-03, 6.078e-02));
	r += mul(s5_3, M4(2.370e-01, -4.704e-02, 2.878e-01, 1.572e-02, 8.702e-02, 2.085e-01, -1.493e-01, 4.833e-03, 2.878e-03, 9.338e-02, -9.101e-02, -4.112e-02, 1.460e-01, 3.081e-02, -1.515e-01, 8.386e-02));
	r += mul(s5_4, M4(7.837e-02, 8.659e-03, 1.307e-01, -1.490e-01, -1.352e-01, 1.181e-01, -7.400e-03, 1.975e-02, 1.048e-02, 7.512e-02, -7.127e-02, -3.951e-02, -1.431e-01, -1.643e-01, 1.383e-01, 7.349e-02));
	r += mul(s5_5, M4(-6.518e-02, -7.774e-03, 4.944e-02, -1.617e-02, -6.328e-02, -7.094e-02, 4.030e-02, 6.984e-02, 9.349e-02, -2.611e-02, -4.126e-02, -4.661e-02, -9.324e-02, -5.326e-02, -3.313e-02, 1.342e-01));
	r += mul(s5_6, M4(4.095e-02, 6.703e-02, 1.374e-01, 5.021e-02, -8.304e-02, 7.704e-02, 3.923e-02, 7.668e-02, -1.441e-01, 6.898e-02, 8.456e-04, -3.362e-02, -5.124e-02, -5.141e-02, -1.341e-01, 7.546e-02));
	r += mul(s5_7, M4(2.028e-01, -6.965e-02, 1.672e-01, 8.417e-02, 1.001e-01, -1.704e-02, -3.863e-02, 7.112e-02, -1.513e-02, 2.499e-02, 1.131e-01, -2.397e-02, -1.813e-01, -5.494e-02, -9.805e-04, -9.176e-02));
	r += mul(s5_8, M4(1.635e-01, 6.301e-02, 3.947e-02, -2.926e-02, -1.375e-01, 4.817e-02, -2.079e-02, 4.112e-02, 2.780e-02, 2.360e-03, 7.991e-03, 2.806e-02, -2.174e-02, -1.659e-01, 6.571e-02, 2.288e-02));
	r += mul(s6_0, M4(1.174e-02, 3.992e-02, -2.902e-02, -1.564e-02, -9.947e-03, 1.022e-01, -4.538e-02, 3.136e-02, -3.908e-02, 2.333e-02, -1.226e-02, -1.850e-02, 7.776e-02, 4.887e-02, -3.093e-02, -3.977e-02));
	r += mul(s6_1, M4(-5.709e-02, 2.241e-01, 7.495e-02, -8.553e-02, -1.715e-02, 7.678e-02, -8.276e-02, -1.398e-01, 6.393e-02, 2.037e-02, -3.649e-02, -4.055e-02, 1.133e-01, 1.523e-01, -2.925e-02, -1.990e-02));
	r += mul(s6_2, M4(-4.584e-02, 5.224e-02, -1.234e-01, -6.822e-02, 7.514e-02, -3.682e-02, -1.897e-02, -6.379e-02, 4.519e-02, 2.138e-02, -4.252e-02, 5.279e-02, 1.038e-01, 1.302e-01, -1.700e-02, 1.614e-05));
	r += mul(s6_3, M4(-1.100e-01, 6.973e-02, -5.033e-02, -4.205e-02, 1.030e-01, 9.182e-02, -5.071e-02, -1.013e-01, 9.751e-02, 4.474e-03, 3.944e-02, -2.502e-02, 4.318e-02, 3.176e-02, 7.114e-02, -9.254e-02));
	r += mul(s6_4, M4(-1.743e-02, -1.281e-01, 1.648e-01, -1.096e-01, -1.154e-01, -1.067e-01, -1.176e-01, -2.978e-02, -1.082e-01, -1.007e-01, 1.235e-01, -2.123e-01, -3.999e-02, 8.520e-02, 1.106e-01, -1.162e-01));
	r += mul(s6_5, M4(5.794e-02, -1.919e-01, 1.467e-01, -1.568e-01, -2.875e-02, -9.323e-02, 7.649e-02, -1.092e-01, 6.633e-02, 6.140e-02, 4.574e-02, 4.918e-02, 1.785e-01, 1.223e-01, 4.278e-03, -1.782e-01));
	r += mul(s6_6, M4(-7.013e-02, -4.507e-02, -7.393e-02, 2.513e-03, 1.189e-01, 1.257e-02, -4.197e-02, -3.805e-02, 7.862e-03, 7.478e-02, -1.991e-02, 1.965e-02, 1.314e-01, -1.430e-02, 2.908e-02, -6.047e-02));
	r += mul(s6_7, M4(-3.217e-02, 4.262e-02, -1.983e-01, 4.035e-02, -1.659e-02, -9.437e-02, -4.622e-02, -6.658e-02, -8.081e-02, 3.849e-02, 1.314e-01, -5.261e-02, 8.661e-02, -8.237e-03, 1.585e-01, -1.614e-01));
	r += mul(s6_8, M4(-5.737e-02, -3.302e-03, -6.742e-02, -9.583e-02, -3.381e-02, -2.261e-02, -1.857e-02, 2.350e-03, 1.091e-01, -6.425e-02, 1.871e-02, -6.960e-02, 1.333e-01, -2.407e-02, 2.956e-02, -2.104e-01));
	r += mul(s7_0, M4(-2.732e-02, -6.994e-02, -1.080e-01, 1.307e-02, 5.425e-03, 6.067e-02, 4.972e-03, 6.205e-02, 5.202e-02, 1.135e-02, -5.078e-02, -9.209e-02, -8.480e-02, -5.938e-02, -2.936e-02, 2.958e-02));
	r += mul(s7_1, M4(-9.352e-02, 4.578e-02, -1.038e-01, 4.931e-02, 2.834e-02, 2.557e-02, 1.260e-01, 8.063e-02, -4.916e-02, -2.708e-02, 1.820e-01, -6.008e-02, 5.311e-02, -2.096e-02, -6.459e-02, -2.527e-02));
	r += mul(s7_2, M4(-1.379e-02, -6.491e-02, -3.690e-02, 8.692e-03, 6.566e-02, -2.458e-02, 7.286e-02, 1.636e-03, 3.495e-02, 9.653e-02, 4.719e-02, 4.307e-04, 3.166e-02, -2.120e-02, 1.104e-03, 2.250e-02));
	r += mul(s7_3, M4(-2.417e-02, -3.347e-02, -5.044e-03, -2.641e-03, 7.385e-02, 5.451e-02, -3.506e-02, 3.922e-02, 1.964e-01, 7.353e-02, 5.081e-02, 5.929e-02, -1.945e-01, -9.979e-03, -2.426e-01, 5.309e-02));
	r += mul(s7_4, M4(1.228e-01, 3.044e-02, -4.675e-02, -6.772e-02, -1.994e-01, -7.171e-02, -1.114e-01, 2.572e-01, -2.172e-01, -1.079e-01, 9.681e-03, -6.967e-02, 1.219e-03, -4.686e-03, -7.765e-03, 8.815e-03));
	r += mul(s7_5, M4(-6.989e-02, -1.078e-01, 8.804e-02, -1.109e-01, 1.683e-01, -4.964e-02, -2.924e-02, 1.125e-02, 2.629e-01, 1.073e-01, -5.051e-02, 1.134e-01, 3.944e-02, -2.659e-02, -3.542e-02, 2.898e-02));
	r += mul(s7_6, M4(2.536e-02, -6.420e-02, -4.203e-02, -1.582e-02, 4.380e-02, -3.555e-02, -2.422e-02, -8.902e-03, -1.086e-01, 2.797e-02, -1.048e-01, 1.116e-01, -1.139e-02, 3.176e-02, 2.587e-02, 2.958e-02));
	r += mul(s7_7, M4(5.033e-02, 1.780e-02, -9.527e-02, 3.954e-02, -6.976e-02, -1.139e-02, 4.906e-02, 7.189e-02, -2.352e-02, 5.318e-02, 5.140e-02, 4.704e-02, -4.843e-02, -3.853e-02, -7.482e-02, -8.305e-02));
	r += mul(s7_8, M4(-1.397e-01, 2.179e-02, 2.338e-02, -1.016e-01, 1.103e-01, -6.863e-02, 1.780e-02, 2.561e-02, 1.304e-01, -1.720e-02, 2.107e-02, 4.751e-03, 4.767e-02, -1.159e-01, -1.885e-01, 1.317e-02));
	r += V4(3.299e-02, 3.575e-02, 4.653e-02, -5.926e-02);
	return r;
}

V4 f3(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) {
	V4 r = 0.0;
	r += mul(s0_0, M4(-5.885e-03, -2.546e-02, 2.558e-02, -2.541e-02, 3.475e-02, 1.645e-01, -4.967e-02, 5.703e-02, -8.175e-03, 1.006e-02, -9.734e-03, 1.796e-02, 1.192e-01, -6.662e-02, 1.989e-02, 6.194e-02));
	r += mul(s0_1, M4(-1.198e-01, 3.162e-02, -3.272e-02, -3.826e-02, -3.700e-01, 1.482e-01, -2.114e-01, -1.351e-01, -8.899e-02, -4.971e-02, 1.209e-01, -8.257e-02, 1.265e-01, -1.939e-02, -6.032e-03, 1.074e-01));
	r += mul(s0_2, M4(1.562e-02, -8.563e-02, -6.263e-03, -9.005e-02, -1.248e-01, 1.228e-01, -6.548e-03, -2.062e-01, 8.436e-02, 6.663e-02, 9.394e-02, -9.600e-02, -6.184e-02, 4.953e-02, -1.026e-01, 3.131e-02));
	r += mul(s0_3, M4(-6.295e-02, -4.597e-02, 8.288e-02, 1.139e-01, -2.021e-01, 6.453e-02, 1.036e-02, -1.155e-01, 2.089e-02, -2.989e-02, -3.874e-02, 1.819e-02, -1.555e-02, -1.129e-01, 9.900e-03, -1.252e-01));
	r += mul(s0_4, M4(9.694e-02, 5.782e-02, 1.188e-02, -9.054e-02, -3.874e-01, 2.309e-01, -1.367e-01, -1.582e-01, -2.347e-01, 6.123e-03, -7.074e-02, -4.260e-02, 8.489e-02, -1.239e-01, 1.993e-02, -3.858e-02));
	r += mul(s0_5, M4(-4.685e-02, -1.415e-01, -1.177e-01, -4.488e-02, 1.536e-01, 2.300e-01, -7.466e-02, 9.373e-02, 1.940e-02, -6.754e-02, 8.615e-03, 4.944e-02, -5.749e-02, 1.053e-01, 3.029e-02, 7.247e-02));
	r += mul(s0_6, M4(-9.967e-02, -5.030e-02, 2.466e-03, 1.887e-02, -5.499e-02, 1.128e-03, -2.234e-03, 6.063e-03, 4.992e-02, -3.842e-02, -6.264e-03, 6.056e-04, -2.896e-02, -1.566e-02, -7.347e-02, -3.167e-02));
	r += mul(s0_7, M4(3.720e-03, -4.195e-02, -2.264e-02, 4.933e-02, 3.558e-02, 3.830e-02, -2.230e-02, 1.220e-02, 5.597e-02, -3.489e-02, -4.640e-02, 3.423e-03, -3.118e-02, -1.338e-02, -1.107e-01, -1.904e-02));
	r += mul(s0_8, M4(8.768e-02, 9.464e-02, -7.227e-02, -4.277e-03, -7.150e-02, 8.855e-02, -1.373e-02, -2.813e-02, 9.033e-03, 1.266e-02, 2.571e-02, 2.239e-02, 7.557e-02, -2.955e-02, 3.206e-02, -3.293e-02));
	r += mul(s1_0, M4(1.093e-01, 2.123e-01, 1.520e-01, -1.096e-02, 1.140e-01, -6.139e-04, 1.689e-01, 7.166e-02, 8.511e-02, -8.368e-02, -5.352e-02, -1.224e-01, 3.872e-01, -1.178e-01, 2.918e-01, 2.200e-01));
	r += mul(s1_1, M4(1.231e-01, 1.746e-01, 4.732e-02, -5.149e-02, 1.214e-01, -1.181e-01, 7.278e-02, 7.755e-02, -5.278e-02, -1.474e-01, 1.502e-02, -3.327e-02, -3.196e-03, 4.036e-01, 7.350e-01, 8.395e-02));
	r += mul(s1_2, M4(2.968e-02, 9.406e-02, -4.022e-02, -1.459e-01, -7.690e-02, -4.029e-03, -5.914e-02, -5.643e-02, -1.059e-01, -8.707e-03, 2.767e-02, -2.201e-01, -5.068e-02, 1.015e-01, 1.402e-01, 6.987e-03));
	r += mul(s1_3, M4(1.979e-02, 2.252e-01, -2.014e-01, -2.294e-01, 1.291e-01, -3.770e-02, 4.641e-02, -7.505e-02, 2.448e-01, -2.456e-01, 6.304e-02, -3.550e-02, 9.021e-02, -7.235e-02, 2.433e-01, -3.196e-02));
	r += mul(s1_4, M4(-1.443e-01, -4.827e-02, 5.127e-02, 1.965e-01, 1.573e-02, -8.680e-02, 9.137e-02, 6.669e-02, 3.524e-01, 3.636e-03, 1.260e-01, 2.576e-01, -1.165e-01, -3.268e-02, 2.371e-01, 1.671e-01));
	r += mul(s1_5, M4(1.851e-01, 3.140e-01, -3.121e-01, 2.096e-01, -1.471e-01, -6.644e-03, -7.641e-02, -4.940e-03, -1.964e-02, 4.312e-02, 1.531e-01, 5.525e-02, -7.958e-02, -4.680e-04, -3.540e-02, -1.654e-01));
	r += mul(s1_6, M4(1.756e-01, 8.053e-02, 8.184e-02, 1.962e-01, 6.900e-02, 2.487e-03, 2.113e-02, -4.635e-02, -1.309e-01, -1.730e-01, -1.307e-01, -1.163e-01, 1.018e-01, 3.996e-02, 9.020e-02, 6.544e-02));
	r += mul(s1_7, M4(-7.530e-02, 2.220e-01, 1.192e-01, -6.576e-02, 3.215e-02, -2.875e-02, 3.263e-02, 8.846e-03, 4.031e-02, -1.396e-01, -1.058e-01, -3.059e-02, -2.242e-01, 1.991e-01, -1.833e-02, -1.613e-02));
	r += mul(s1_8, M4(-2.439e-01, 9.792e-02, 6.240e-02, -9.079e-02, 2.016e-02, -3.977e-02, -5.885e-02, 2.288e-02, 2.199e-01, -9.182e-02, 4.601e-02, -3.255e-02, 1.085e-01, -1.052e-01, 3.403e-02, -8.799e-02));
	r += mul(s2_0, M4(-8.172e-02, -5.813e-02, -1.148e-01, -5.606e-02, 1.254e-01, -1.118e-02, 8.617e-02, 3.101e-02, -6.399e-02, 2.935e-02, -3.022e-02, -9.815e-02, 1.263e-01, 8.716e-02, 1.488e-03, 2.892e-02));
	r += mul(s2_1, M4(-2.403e-01, -6.756e-02, 3.861e-02, -1.272e-01, 1.012e-01, -9.013e-03, -3.556e-02, 4.456e-02, 3.567e-02, 8.643e-02, -6.859e-04, 7.789e-02, -9.175e-02, 8.167e-02, 9.014e-03, -3.291e-03));
	r += mul(s2_2, M4(-2.589e-02, -8.047e-02, 1.765e-02, -5.666e-02, 2.589e-02, -6.236e-02, 5.595e-02, 1.409e-01, -9.791e-02, -3.648e-02, 9.036e-03, -3.095e-02, -7.711e-02, -9.589e-02, 7.740e-02, -3.167e-02));
	r += mul(s2_3, M4(-4.075e-01, -2.607e-01, -1.351e-01, 8.469e-02, -4.617e-02, -6.523e-02, -6.272e-03, 1.454e-01, 7.918e-02, 4.549e-02, -1.289e-01, 4.559e-04, -1.578e-01, -3.195e-02, -1.310e-01, -1.988e-01));
	r += mul(s2_4, M4(-1.001e-01, 1.142e-01, -2.295e-01, -1.917e-01, 2.870e-01, -2.153e-01, 1.699e-01, 8.478e-02, 7.359e-02, -2.038e-01, 1.080e-01, 9.298e-02, 1.395e-03, -6.084e-02, 8.516e-03, 5.921e-02));
	r += mul(s2_5, M4(4.890e-02, -1.980e-01, 5.298e-02, -7.964e-02, -6.946e-02, -2.022e-01, -1.893e-03, -8.310e-02, -5.277e-02, -9.537e-03, 5.377e-02, 6.629e-02, 1.544e-02, 3.057e-03, -7.162e-02, -1.441e-01));
	r += mul(s2_6, M4(7.994e-02, -2.992e-02, -1.136e-01, 2.080e-03, -5.637e-02, 3.382e-02, 8.393e-03, -1.294e-02, 2.123e-02, 6.833e-03, 3.525e-02, 3.002e-02, 7.027e-02, 7.827e-02, 1.082e-01, 1.245e-02));
	r += mul(s2_7, M4(-2.703e-01, -1.811e-01, 1.082e-01, -1.525e-02, 1.525e-01, 9.581e-02, -4.152e-02, -1.611e-02, -1.248e-01, -1.078e-02, 1.731e-02, -4.998e-02, -1.774e-01, 5.634e-02, -1.451e-01, 6.042e-02));
	r += mul(s2_8, M4(7.234e-02, -7.818e-02, 4.948e-04, 4.252e-02, 1.517e-01, -6.549e-02, -9.122e-02, -1.192e-02, 4.772e-02, 4.773e-02, -2.814e-02, -4.270e-02, 2.777e-02, 7.317e-02, -1.038e-01, 2.099e-03));
	r += mul(s3_0, M4(-1.160e-03, -4.776e-02, 1.320e-01, -5.099e-02, -7.016e-02, -3.634e-02, 5.819e-03, -3.680e-02, 2.560e-02, 9.705e-03, -1.349e-01, 4.162e-02, 2.998e-01, 6.999e-02, -9.148e-02, 1.306e-01));
	r += mul(s3_1, M4(8.671e-02, -2.513e-04, 2.481e-01, -3.360e-02, 1.810e-02, 1.426e-02, -1.753e-01, -7.422e-02, -4.182e-04, 2.963e-02, -4.225e-02, 7.439e-02, -5.775e-02, 1.039e-01, -8.999e-02, 1.931e-01));
	r += mul(s3_2, M4(1.279e-01, 4.706e-02, 1.274e-01, 5.953e-02, -4.903e-02, -4.301e-02, -6.777e-02, 1.582e-02, -3.740e-02, -1.648e-03, -9.810e-05, 1.001e-01, 1.818e-01, -9.297e-02, 3.020e-04, -5.370e-02));
	r += mul(s3_3, M4(-9.060e-02, 1.185e-02, -1.783e-01, -4.235e-02, -1.380e-01, -2.946e-03, 1.148e-01, 1.939e-02, 1.450e-01, 2.229e-02, -2.045e-01, 3.980e-02, 2.243e-01, -7.762e-02, -9.742e-04, -8.569e-02));
	r += mul(s3_4, M4(-2.442e-02, 2.062e-01, -1.265e-01, 4.012e-02, 2.739e-01, 4.146e-03, 1.713e-01, 3.659e-02, 1.138e-01, -1.864e-01, -4.026e-02, 1.728e-01, -7.976e-02, 6.411e-02, 2.068e-02, 2.147e-01));
	r += mul(s3_5, M4(-1.294e-01, 2.487e-02, 1.467e-01, 8.779e-02, -1.647e-01, 4.106e-02, 3.740e-02, 1.564e-02, -1.010e-03, 2.372e-02, 3.233e-02, 1.864e-01, -1.230e-01, -7.062e-02, 4.219e-02, -1.362e-01));
	r += mul(s3_6, M4(1.222e-01, -5.516e-02, -8.402e-02, 1.478e-02, -4.594e-02, 2.981e-02, -3.904e-02, -8.650e-02, -1.487e-03, 1.269e-03, 4.843e-02, 2.894e-02, 5.032e-02, 6.047e-02, -1.473e-03, 3.292e-02));
	r += mul(s3_7, M4(9.554e-02, 2.187e-02, -1.455e-01, -8.992e-02, 1.309e-03, -3.460e-02, 1.417e-01, -3.960e-02, -1.057e-01, -3.014e-02, 1.360e-01, -6.093e-02, 2.066e-01, 2.765e-01, -1.451e-02, 2.153e-01));
	r += mul(s3_8, M4(6.974e-02, -8.067e-03, -1.227e-02, 2.595e-02, -8.348e-03, -6.840e-02, -1.754e-02, 3.177e-02, -3.672e-02, -3.918e-03, 9.301e-04, 6.000e-02, 4.009e-02, 1.286e-01, 1.005e-01, 4.752e-02));
	r += mul(s4_0, M4(-6.639e-03, 1.205e-02, -9.161e-02, 6.168e-02, 1.450e-01, -8.361e-02, 2.304e-01, 8.059e-02, -3.226e-03, -8.089e-02, 1.486e-01, 3.589e-02, 1.554e-04, -2.149e-02, -1.098e-01, 6.157e-03));
	r += mul(s4_1, M4(-7.550e-02, 2.784e-02, -1.346e-01, 3.855e-02, 1.488e-01, -8.323e-02, 1.272e-03, -1.955e-02, 7.526e-02, -4.654e-02, 1.138e-01, -2.886e-02, 1.533e-01, -5.429e-02, 1.491e-01, 1.911e-02));
	r += mul(s4_2, M4(2.749e-02, 2.167e-03, -2.995e-02, 1.297e-01, -3.067e-02, -1.002e-01, -1.171e-01, 5.089e-02, 7.106e-02, 2.617e-02, 7.060e-02, -1.464e-02, 1.128e-01, 1.469e-01, 1.755e-01, -1.565e-02));
	r += mul(s4_3, M4(-3.147e-02, -2.933e-02, 1.108e-01, -5.021e-03, 5.973e-02, -1.508e-01, 1.813e-01, 9.161e-02, -5.187e-02, 8.315e-02, -6.392e-02, 8.132e-02, -1.111e-01, 4.777e-02, 8.955e-02, -9.181e-02));
	r += mul(s4_4, M4(7.240e-02, 5.859e-02, -9.687e-03, 9.683e-02, -2.997e-01, 5.311e-02, -7.869e-02, 1.245e-01, 4.405e-01, 2.145e-04, 1.532e-02, -1.421e-01, 1.712e-02, -3.623e-02, 2.816e-01, -9.044e-02));
	r += mul(s4_5, M4(-1.773e-01, 8.718e-02, 1.580e-01, -6.844e-02, 1.122e-01, -3.360e-04, -5.619e-02, -8.491e-02, -1.605e-01, 1.087e-01, -6.330e-03, -5.711e-02, -1.025e-01, -2.467e-03, 6.342e-02, -3.439e-02));
	r += mul(s4_6, M4(2.071e-02, -5.719e-02, 4.813e-03, 4.429e-02, 9.461e-02, -2.034e-02, 1.234e-01, 1.544e-02, -4.892e-02, 2.310e-02, 6.791e-02, 4.307e-02, -2.855e-02, -4.628e-04, 1.339e-02, -5.448e-02));
	r += mul(s4_7, M4(3.531e-02, 7.981e-02, -2.148e-01, 2.049e-02, -1.053e-02, -3.160e-02, 1.023e-01, 2.071e-02, -8.098e-02, 9.493e-02, -5.892e-02, -1.295e-02, 1.871e-01, -4.189e-02, 9.112e-02, 8.300e-02));
	r += mul(s4_8, M4(-1.699e-02, -3.385e-02, -3.076e-02, -2.967e-02, 1.298e-02, -1.494e-02, 7.397e-03, 4.782e-02, 1.178e-01, 5.643e-02, 2.861e-02, -9.439e-03, -8.228e-02, 1.107e-01, -4.137e-03, -3.320e-02));
	r += mul(s5_0, M4(1.071e-01, 8.736e-02, -5.126e-02, -3.961e-02, 1.843e-01, -2.182e-01, 1.343e-01, 1.082e-01, -6.434e-02, -9.993e-02, -4.548e-02, -3.949e-02, 3.089e-02, -4.468e-02, -5.958e-03, -1.703e-02));
	r += mul(s5_1, M4(5.276e-02, 2.210e-01, -1.552e-01, 1.921e-01, 2.015e-01, -9.600e-02, 4.278e-02, -1.323e-01, -6.578e-03, -1.043e-01, 5.091e-02, 4.558e-02, 4.814e-02, -2.042e-01, 1.910e-01, 3.570e-02));
	r += mul(s5_2, M4(-7.656e-02, 1.000e-01, 6.224e-02, -2.822e-02, -6.178e-02, -1.274e-01, -8.665e-02, -3.369e-02, -7.111e-02, 3.011e-02, 3.098e-02, 1.618e-02, 7.771e-02, -2.006e-03, 1.463e-01, -4.340e-02));
	r += mul(s5_3, M4(1.671e-02, -2.406e-02, 3.675e-02, -6.527e-02, 1.862e-02, -2.094e-01, 1.246e-01, -3.006e-02, -2.290e-02, 2.513e-02, -5.528e-02, -3.249e-02, 1.135e-01, -1.494e-01, -7.521e-02, -3.806e-02));
	r += mul(s5_4, M4(2.826e-01, 1.471e-01, 2.434e-01, 6.211e-01, -1.110e-01, 5.876e-02, 2.357e-02, 1.291e-01, 1.158e-01, -6.958e-02, -4.165e-02, -1.199e-02, -1.858e-01, -2.900e-01, 2.511e-03, 2.313e-02));
	r += mul(s5_5, M4(2.556e-02, 3.107e-01, 6.381e-02, 1.181e-01, 1.081e-01, -1.219e-02, -2.541e-02, -7.888e-02, -1.273e-01, -9.006e-02, -1.173e-02, -1.212e-02, -8.626e-03, 1.553e-02, 4.044e-02, -5.263e-02));
	r += mul(s5_6, M4(7.118e-02, 1.130e-01, 9.119e-02, 8.911e-03, 1.276e-01, -6.608e-02, 5.927e-02, -4.754e-02, -1.168e-01, -1.445e-03, -4.022e-02, -3.406e-02, -3.202e-02, -1.565e-01, -1.082e-01, -6.669e-02));
	r += mul(s5_7, M4(-1.636e-01, -4.448e-02, 2.813e-02, 1.441e-01, -1.376e-02, 2.730e-02, 8.901e-04, -2.648e-02, -1.212e-01, 8.716e-02, 3.431e-02, -7.011e-02, 6.266e-02, -6.659e-03, -5.885e-02, 1.105e-02));
	r += mul(s5_8, M4(-1.766e-02, 1.021e-02, 3.733e-02, 7.508e-02, 2.418e-02, 2.041e-02, -1.344e-02, -1.642e-02, 6.738e-02, 1.122e-02, 4.157e-02, -2.922e-02, -1.743e-02, 9.020e-02, 1.103e-01, -7.592e-02));
	r += mul(s6_0, M4(1.134e-01, -3.963e-02, 1.128e-01, 3.215e-02, 3.800e-02, 3.478e-02, -9.269e-02, 8.044e-02, -2.934e-01, 1.042e-02, -1.862e-01, -9.349e-02, 7.495e-02, 6.232e-02, 1.725e-01, 1.731e-02));
	r += mul(s6_1, M4(1.909e-02, 4.707e-02, -1.050e-01, 2.469e-02, -3.667e-02, 1.034e-01, -5.543e-02, -5.119e-02, -2.366e-01, -1.433e-01, -1.732e-01, -2.134e-01, 5.333e-02, -6.928e-02, 7.900e-02, 8.672e-02));
	r += mul(s6_2, M4(-7.238e-02, 1.367e-02, -1.049e-01, 1.265e-01, -3.939e-03, 2.224e-02, -8.017e-04, -2.103e-02, -1.096e-01, -1.776e-03, 1.391e-01, 1.414e-01, 1.216e-01, 9.376e-02, 5.524e-02, 6.758e-02));
	r += mul(s6_3, M4(-1.067e-01, 1.998e-02, -2.571e-02, -5.100e-03, -1.376e-01, -5.383e-02, -1.193e-01, -7.877e-02, -2.767e-04, 2.886e-02, 2.653e-02, 1.714e-01, -1.229e-02, -7.802e-02, 3.130e-02, -2.958e-02));
	r += mul(s6_4, M4(-7.202e-02, -6.781e-02, 7.058e-02, 1.504e-02, 1.208e-01, 9.888e-02, -4.686e-03, -1.753e-02, 1.937e-01, 7.496e-02, -1.258e-01, 1.294e-01, -1.517e-02, -1.711e-03, 2.826e-01, 2.925e-01));
	r += mul(s6_5, M4(7.727e-02, -1.526e-01, 2.852e-01, 2.777e-01, -8.854e-02, 6.094e-02, -5.698e-03, -1.644e-01, -7.879e-02, 1.572e-02, 3.822e-02, 9.991e-02, -1.252e-01, 1.024e-01, 2.398e-01, 2.683e-01));
	r += mul(s6_6, M4(-2.504e-03, -7.172e-02, -1.091e-01, 1.194e-02, 7.060e-02, -4.136e-03, 2.395e-03, 5.092e-02, -2.992e-02, -2.311e-03, -4.920e-02, -1.680e-01, 4.731e-02, 3.985e-02, 1.189e-01, 8.127e-02));
	r += mul(s6_7, M4(7.482e-02, -5.409e-02, -7.627e-02, -3.000e-02, 4.041e-02, 9.987e-02, -1.435e-02, 1.451e-01, -2.708e-02, -7.973e-02, 1.030e-01, 8.814e-02, 3.109e-03, -7.251e-02, 1.606e-01, 3.079e-02));
	r += mul(s6_8, M4(-7.090e-02, -1.135e-01, 8.462e-02, -6.155e-02, -3.090e-02, 5.601e-02, 3.941e-02, 4.744e-03, 2.282e-02, 4.505e-02, 1.192e-02, 6.843e-02, 1.088e-01, -1.489e-01, 1.263e-01, 5.793e-02));
	r += mul(s7_0, M4(2.178e-02, 1.912e-02, 4.309e-02, 1.703e-02, 3.737e-02, -7.983e-03, 2.782e-02, 1.850e-02, 1.452e-01, -1.497e-01, 4.961e-02, 5.188e-02, 1.984e-02, -2.894e-02, 1.030e-01, -3.062e-04));
	r += mul(s7_1, M4(-2.136e-01, -4.810e-02, -9.977e-02, 1.103e-01, -3.734e-02, -1.038e-01, -3.264e-02, 1.932e-02, 3.639e-01, -6.706e-02, 4.201e-01, 1.576e-02, -4.993e-02, -1.537e-02, -5.964e-02, 4.097e-02));
	r += mul(s7_2, M4(2.272e-02, -2.516e-02, -1.510e-02, 5.791e-02, -7.611e-02, -2.916e-02, 1.016e-01, 1.075e-01, 1.209e-02, 7.300e-02, 6.268e-02, 9.154e-02, 8.169e-02, -7.432e-02, -2.736e-02, -2.039e-03));
	r += mul(s7_3, M4(-5.458e-02, -3.447e-02, -3.795e-03, -5.079e-02, -6.263e-02, -1.703e-01, 2.396e-02, -2.922e-02, 2.510e-01, 2.071e-02, -6.895e-02, 8.368e-02, -1.364e-01, 4.028e-02, -7.619e-02, -6.385e-02));
	r += mul(s7_4, M4(-1.184e-01, -7.357e-02, 2.109e-01, 9.892e-02, 1.238e-01, 3.710e-03, -6.457e-02, -9.826e-02, -3.336e-01, 2.071e-02, -2.630e-02, -4.478e-02, 6.411e-02, 4.174e-03, 9.080e-02, 4.497e-02));
	r += mul(s7_5, M4(1.538e-01, 7.003e-02, 6.026e-02, 3.331e-02, -1.284e-01, -8.834e-02, 1.376e-01, 8.211e-02, -1.947e-01, -1.464e-01, 3.603e-02, -8.266e-02, -1.739e-02, -6.261e-02, -1.003e-01, 4.533e-02));
	r += mul(s7_6, M4(-1.178e-01, -5.722e-02, 2.677e-02, -4.211e-03, -1.071e-01, -8.749e-02, 1.005e-01, 7.650e-02, -1.112e-01, 6.735e-02, -8.316e-02, -9.940e-02, 1.052e-01, -9.073e-02, -3.761e-02, 4.248e-02));
	r += mul(s7_7, M4(5.572e-02, -3.820e-02, -1.385e-01, 3.057e-02, 5.144e-02, -1.668e-02, 1.240e-01, 1.815e-01, -5.345e-02, 1.495e-01, -8.913e-02, 9.643e-02, -2.567e-03, -6.845e-02, 1.134e-01, 8.737e-02));
	r += mul(s7_8, M4(-8.663e-04, -8.466e-04, 6.296e-02, 7.255e-02, 9.676e-02, -7.858e-02, 1.671e-01, 8.251e-02, 2.800e-01, 1.228e-01, -1.019e-01, -5.512e-02, -4.996e-02, 5.146e-02, -9.201e-02, -2.324e-02));
	r += V4(1.173e-02, 1.947e-02, 2.270e-02, 7.192e-02);
	return r;
}

void Pass6(uint2 blockStart, uint3 tid) {
	float2 pt = float2(GetInputPt());
	uint2 gxy = Rmp8x8(tid.x) + blockStart;
	uint2 size = GetInputSize();
	if (gxy.x >= size.x || gxy.y >= size.y) {
		return;
	}
	float2 pos = (gxy + 0.5) * pt;

	V4 s0_0 = l0(-1.0, -1.0);
	V4 s0_1 = l0(0.0, -1.0);
	V4 s0_2 = l0(1.0, -1.0);
	V4 s0_3 = l0(-1.0, 0.0);
	V4 s0_4 = l0(0.0, 0.0);
	V4 s0_5 = l0(1.0, 0.0);
	V4 s0_6 = l0(-1.0, 1.0);
	V4 s0_7 = l0(0.0, 1.0);
	V4 s0_8 = l0(1.0, 1.0);
	V4 s1_0 = -max(-s0_0, 0.0);
	V4 s1_1 = -max(-s0_1, 0.0);
	V4 s1_2 = -max(-s0_2, 0.0);
	V4 s1_3 = -max(-s0_3, 0.0);
	V4 s1_4 = -max(-s0_4, 0.0);
	V4 s1_5 = -max(-s0_5, 0.0);
	V4 s1_6 = -max(-s0_6, 0.0);
	V4 s1_7 = -max(-s0_7, 0.0);
	V4 s1_8 = -max(-s0_8, 0.0);
	s0_0 = max(s0_0, 0.0);
	s0_1 = max(s0_1, 0.0);
	s0_2 = max(s0_2, 0.0);
	s0_3 = max(s0_3, 0.0);
	s0_4 = max(s0_4, 0.0);
	s0_5 = max(s0_5, 0.0);
	s0_6 = max(s0_6, 0.0);
	s0_7 = max(s0_7, 0.0);
	s0_8 = max(s0_8, 0.0);

	V4 s2_0 = l1(-1.0, -1.0);
	V4 s2_1 = l1(0.0, -1.0);
	V4 s2_2 = l1(1.0, -1.0);
	V4 s2_3 = l1(-1.0, 0.0);
	V4 s2_4 = l1(0.0, 0.0);
	V4 s2_5 = l1(1.0, 0.0);
	V4 s2_6 = l1(-1.0, 1.0);
	V4 s2_7 = l1(0.0, 1.0);
	V4 s2_8 = l1(1.0, 1.0);
	V4 s3_0 = -max(-s2_0, 0.0);
	V4 s3_1 = -max(-s2_1, 0.0);
	V4 s3_2 = -max(-s2_2, 0.0);
	V4 s3_3 = -max(-s2_3, 0.0);
	V4 s3_4 = -max(-s2_4, 0.0);
	V4 s3_5 = -max(-s2_5, 0.0);
	V4 s3_6 = -max(-s2_6, 0.0);
	V4 s3_7 = -max(-s2_7, 0.0);
	V4 s3_8 = -max(-s2_8, 0.0);
	s2_0 = max(s2_0, 0.0);
	s2_1 = max(s2_1, 0.0);
	s2_2 = max(s2_2, 0.0);
	s2_3 = max(s2_3, 0.0);
	s2_4 = max(s2_4, 0.0);
	s2_5 = max(s2_5, 0.0);
	s2_6 = max(s2_6, 0.0);
	s2_7 = max(s2_7, 0.0);
	s2_8 = max(s2_8, 0.0);

	V4 s4_0 = l2(-1.0, -1.0);
	V4 s4_1 = l2(0.0, -1.0);
	V4 s4_2 = l2(1.0, -1.0);
	V4 s4_3 = l2(-1.0, 0.0);
	V4 s4_4 = l2(0.0, 0.0);
	V4 s4_5 = l2(1.0, 0.0);
	V4 s4_6 = l2(-1.0, 1.0);
	V4 s4_7 = l2(0.0, 1.0);
	V4 s4_8 = l2(1.0, 1.0);
	V4 s5_0 = -max(-s4_0, 0.0);
	V4 s5_1 = -max(-s4_1, 0.0);
	V4 s5_2 = -max(-s4_2, 0.0);
	V4 s5_3 = -max(-s4_3, 0.0);
	V4 s5_4 = -max(-s4_4, 0.0);
	V4 s5_5 = -max(-s4_5, 0.0);
	V4 s5_6 = -max(-s4_6, 0.0);
	V4 s5_7 = -max(-s4_7, 0.0);
	V4 s5_8 = -max(-s4_8, 0.0);
	s4_0 = max(s4_0, 0.0);
	s4_1 = max(s4_1, 0.0);
	s4_2 = max(s4_2, 0.0);
	s4_3 = max(s4_3, 0.0);
	s4_4 = max(s4_4, 0.0);
	s4_5 = max(s4_5, 0.0);
	s4_6 = max(s4_6, 0.0);
	s4_7 = max(s4_7, 0.0);
	s4_8 = max(s4_8, 0.0);

	V4 s6_0 = l3(-1.0, -1.0);
	V4 s6_1 = l3(0.0, -1.0);
	V4 s6_2 = l3(1.0, -1.0);
	V4 s6_3 = l3(-1.0, 0.0);
	V4 s6_4 = l3(0.0, 0.0);
	V4 s6_5 = l3(1.0, 0.0);
	V4 s6_6 = l3(-1.0, 1.0);
	V4 s6_7 = l3(0.0, 1.0);
	V4 s6_8 = l3(1.0, 1.0);
	V4 s7_0 = -max(-s6_0, 0.0);
	V4 s7_1 = -max(-s6_1, 0.0);
	V4 s7_2 = -max(-s6_2, 0.0);
	V4 s7_3 = -max(-s6_3, 0.0);
	V4 s7_4 = -max(-s6_4, 0.0);
	V4 s7_5 = -max(-s6_5, 0.0);
	V4 s7_6 = -max(-s6_6, 0.0);
	V4 s7_7 = -max(-s6_7, 0.0);
	V4 s7_8 = -max(-s6_8, 0.0);
	s6_0 = max(s6_0, 0.0);
	s6_1 = max(s6_1, 0.0);
	s6_2 = max(s6_2, 0.0);
	s6_3 = max(s6_3, 0.0);
	s6_4 = max(s6_4, 0.0);
	s6_5 = max(s6_5, 0.0);
	s6_6 = max(s6_6, 0.0);
	s6_7 = max(s6_7, 0.0);
	s6_8 = max(s6_8, 0.0);

	t4[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8);
	t5[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8);
	t6[gxy] = f2(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8);
	t7[gxy] = f3(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8);
}

//!PASS 7
//!DESC conv6
//!BLOCK_SIZE 8
//!NUM_THREADS 64
//!IN t4, t5, t6, t7
//!OUT t0, t1, t2, t3

#define l0(x, y) V4(O(t4, float2(x, y)))
#define l1(x, y) V4(O(t5, float2(x, y)))
#define l2(x, y) V4(O(t6, float2(x, y)))
#define l3(x, y) V4(O(t7, float2(x, y)))

V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) {
	V4 r = 0.0;
	r += mul(s0_0, M4(-4.959e-02, 2.300e-02, 6.317e-03, 1.703e-02, -2.688e-03, 1.642e-02, -2.200e-02, -2.420e-02, 2.363e-02, -1.022e-01, 6.023e-02, -2.609e-02, 9.291e-02, -1.160e-01, 2.052e-02, 2.354e-01));
	r += mul(s0_1, M4(-1.879e-02, -1.916e-02, -8.637e-03, 4.526e-02, 6.088e-02, -2.138e-03, -4.232e-02, -4.008e-02, -4.421e-02, 2.282e-01, 3.115e-02, -1.581e-02, -6.951e-03, -2.424e-02, 1.346e-01, -3.285e-02));
	r += mul(s0_2, M4(-4.787e-02, -1.360e-01, 5.257e-02, -4.066e-02, 2.153e-02, 2.471e-02, 3.004e-02, -7.448e-02, -1.941e-02, 1.677e-01, -1.904e-01, -2.967e-02, -7.226e-03, 8.123e-03, 1.559e-01, -1.188e-02));
	r += mul(s0_3, M4(-3.923e-02, -4.992e-02, 6.446e-02, 7.936e-02, -3.738e-02, -1.250e-02, 5.185e-02, 2.579e-02, 5.660e-02, -1.233e-01, 5.719e-02, 1.460e-02, 2.067e-01, -4.069e-02, 3.571e-02, 1.317e-01));
	r += mul(s0_4, M4(2.346e-02, -2.846e-01, -2.588e-02, 5.688e-02, -5.397e-03, -1.564e-02, -6.028e-02, 7.542e-02, -1.610e-01, 6.204e-02, 5.513e-02, 1.456e-02, -2.901e-01, 9.572e-02, 2.208e-02, -1.044e-01));
	r += mul(s0_5, M4(-9.583e-02, -2.630e-02, 4.636e-02, -1.747e-02, 5.710e-02, -3.728e-02, -9.599e-02, 2.775e-02, 9.363e-02, -1.286e-02, -6.437e-02, 8.923e-03, 5.191e-02, 2.725e-02, 5.111e-02, 5.944e-02));
	r += mul(s0_6, M4(3.916e-02, 1.242e-04, -2.263e-02, 1.843e-02, -2.212e-02, 1.383e-02, 4.635e-02, -8.325e-03, 8.122e-02, -1.123e-01, 1.783e-02, -7.211e-03, 5.749e-03, -1.679e-03, -5.085e-02, 6.039e-03));
	r += mul(s0_7, M4(-4.237e-02, 2.693e-02, 7.501e-03, -5.510e-02, 1.524e-02, -4.218e-02, -1.076e-02, -3.760e-02, 1.048e-01, -5.222e-02, 1.925e-02, -1.094e-02, 7.092e-03, -3.861e-02, 5.624e-02, 1.161e-02));
	r += mul(s0_8, M4(4.406e-03, 6.380e-02, 1.261e-02, -2.852e-02, 1.263e-02, -1.119e-02, -5.527e-02, -2.118e-02, 1.659e-02, 1.159e-01, -5.585e-02, -2.343e-02, 1.292e-02, -4.138e-02, 1.541e-02, 6.174e-02));
	r += mul(s1_0, M4(-1.596e-02, 1.235e-01, -1.475e-02, -1.215e-01, 4.439e-02, 2.252e-02, -1.036e-04, 2.227e-01, 2.471e-02, 1.859e-02, -1.305e-02, 2.673e-02, 1.234e-02, -7.778e-02, -6.012e-02, -3.814e-02));
	r += mul(s1_1, M4(-1.121e-01, 1.950e-01, 1.715e-01, 1.439e-02, 6.037e-02, -3.254e-01, -7.611e-02, -1.765e-01, 3.997e-02, 6.524e-02, 9.926e-02, 4.034e-02, -3.307e-02, -8.003e-02, -1.239e-02, 2.386e-02));
	r += mul(s1_2, M4(-6.216e-02, 1.513e-01, 1.518e-02, 1.468e-02, -5.238e-02, -1.184e-02, 9.673e-02, 8.954e-02, -3.878e-02, -3.462e-02, 1.696e-03, 4.314e-02, 3.306e-02, -2.129e-02, 3.722e-02, 9.545e-03));
	r += mul(s1_3, M4(-4.358e-02, 1.169e-01, -6.705e-02, 7.877e-02, 6.420e-02, 1.647e-02, -1.408e-01, -1.886e-02, -1.090e-02, -7.420e-02, 1.611e-02, -2.608e-02, 1.181e-02, 1.139e-02, -3.279e-02, -1.538e-01));
	r += mul(s1_4, M4(-7.269e-02, -9.624e-02, 2.104e-01, -3.678e-02, -8.138e-02, -7.530e-02, 1.403e-01, 1.101e-01, -2.562e-01, 1.725e-01, -2.215e-01, -1.774e-02, -9.527e-02, 2.512e-02, -1.014e-01, -6.340e-02));
	r += mul(s1_5, M4(9.204e-02, 1.466e-01, -8.662e-03, -7.309e-02, 1.940e-01, 1.822e-03, -1.982e-01, -4.872e-02, -9.534e-02, -3.194e-03, -2.147e-01, -5.603e-02, 4.305e-02, 4.031e-02, -1.879e-01, 1.564e-02));
	r += mul(s1_6, M4(-4.283e-02, 4.618e-02, -8.197e-02, -6.512e-02, -7.169e-02, -6.047e-02, 2.293e-02, 3.459e-02, -5.382e-02, -5.021e-02, 7.904e-02, -3.021e-02, 2.588e-03, -6.667e-02, 5.148e-02, 5.692e-02));
	r += mul(s1_7, M4(-8.603e-02, 1.255e-01, 2.339e-01, -3.309e-02, -5.634e-02, -1.763e-01, 9.410e-02, 1.203e-01, -3.287e-02, -4.261e-02, 1.248e-01, 7.539e-02, -5.734e-03, -3.458e-02, 8.259e-02, 8.584e-04));
	r += mul(s1_8, M4(9.075e-02, -4.847e-02, -4.681e-02, 1.470e-01, -1.874e-02, -1.161e-01, -1.934e-01, 3.652e-02, 4.413e-02, 4.896e-02, -1.374e-01, -6.084e-02, 8.472e-02, 6.868e-03, -5.092e-02, 2.943e-02));
	r += mul(s2_0, M4(-7.578e-02, 3.864e-02, 3.828e-02, 1.041e-01, -2.067e-02, -1.882e-02, 2.747e-03, 1.201e-02, 1.090e-02, 7.076e-04, 4.765e-03, -3.586e-02, 5.400e-02, -2.777e-02, 1.078e-02, 1.827e-02));
	r += mul(s2_1, M4(5.610e-02, -6.995e-03, 4.990e-03, 9.165e-02, -1.922e-02, 7.537e-02, 1.615e-02, -1.001e-01, -3.874e-02, -6.028e-03, 1.239e-01, 4.476e-02, -5.677e-02, 2.028e-01, 8.904e-02, -6.016e-02));
	r += mul(s2_2, M4(-3.735e-02, -1.418e-01, -7.564e-02, -2.437e-02, -1.181e-02, -3.410e-02, 7.301e-02, 8.895e-03, -3.613e-02, -3.974e-02, -3.959e-02, 1.515e-02, -1.539e-03, -2.772e-03, -3.304e-02, -3.478e-02));
	r += mul(s2_3, M4(-1.914e-02, -9.525e-02, 8.687e-02, 2.162e-02, -3.970e-02, -4.097e-02, -2.761e-02, -8.150e-02, 1.069e-02, 1.161e-02, 4.322e-02, 3.107e-02, -6.213e-02, 5.408e-04, -2.306e-03, -2.409e-02));
	r += mul(s2_4, M4(1.511e-01, 1.203e-01, -2.227e-01, -9.491e-02, -1.130e-01, 1.699e-01, -5.111e-02, 8.725e-02, 5.434e-02, -1.741e-01, 1.328e-02, 1.151e-01, 2.696e-02, 1.803e-01, 2.174e-02, 1.227e-01));
	r += mul(s2_5, M4(-1.622e-02, 2.244e-01, 2.659e-02, -2.045e-02, -5.316e-02, 2.989e-02, -3.311e-02, -1.226e-01, -3.029e-03, 6.464e-03, -1.315e-02, 2.236e-02, -6.947e-02, -5.856e-02, -7.343e-02, -1.114e-02));
	r += mul(s2_6, M4(-4.299e-02, 4.939e-03, 1.489e-01, 2.805e-02, 3.084e-02, 1.229e-02, -3.138e-02, 2.415e-02, 2.300e-02, -5.926e-02, 3.672e-02, -9.303e-02, -2.705e-02, -7.170e-02, 8.702e-02, 5.325e-02));
	r += mul(s2_7, M4(-1.679e-02, -1.060e-01, -6.970e-02, 6.048e-02, -1.964e-02, 2.604e-02, 1.357e-01, 2.298e-02, -1.148e-01, 5.353e-02, -8.100e-02, 2.667e-02, 1.939e-02, -3.578e-02, -1.812e-02, 3.766e-02));
	r += mul(s2_8, M4(-3.038e-04, -5.991e-03, -3.775e-02, -3.493e-02, 2.390e-02, -7.596e-02, 2.851e-02, 6.164e-02, 1.460e-02, -1.681e-03, 4.620e-02, -1.403e-02, 2.086e-02, 2.709e-02, -1.620e-02, 3.560e-02));
	r += mul(s3_0, M4(1.141e-04, 2.863e-02, 7.410e-02, 6.073e-02, -1.141e-01, 6.118e-02, -3.687e-03, 6.243e-02, -4.684e-03, 8.379e-02, 2.439e-02, -9.396e-02, 4.764e-04, -1.569e-02, 8.856e-02, 7.804e-03));
	r += mul(s3_1, M4(-2.494e-02, -2.837e-01, 5.573e-02, 1.198e-02, -4.818e-02, 2.223e-02, -9.465e-02, 5.685e-02, -2.476e-02, 2.298e-01, -5.739e-02, 1.791e-01, -4.102e-02, 9.728e-02, -5.806e-02, -3.767e-02));
	r += mul(s3_2, M4(9.027e-02, 2.405e-03, 1.261e-01, -1.430e-02, 6.552e-03, -5.556e-02, 1.538e-02, 1.255e-02, -5.140e-02, 1.569e-01, -1.936e-01, 1.306e-02, 4.347e-02, -1.850e-02, -1.557e-03, -1.791e-02));
	r += mul(s3_3, M4(9.246e-02, -1.177e-01, -7.298e-02, -1.532e-02, -1.054e-01, -5.527e-02, 5.620e-03, -8.630e-02, -1.256e-01, -7.708e-02, 6.246e-02, 2.278e-02, -1.151e-01, -2.338e-02, 4.332e-02, 4.785e-02));
	r += mul(s3_4, M4(5.673e-02, 6.712e-02, -1.401e-02, -1.203e-03, 3.747e-02, 4.443e-03, -4.435e-02, 5.732e-02, -1.563e-03, -7.244e-03, -1.148e-01, -1.453e-01, 1.674e-01, -9.521e-02, -3.138e-02, 1.018e-01));
	r += mul(s3_5, M4(-5.533e-02, 1.502e-01, 1.289e-01, -4.958e-02, -6.971e-02, -1.172e-01, 1.874e-01, 6.877e-03, 1.016e-02, 1.560e-01, -1.155e-01, 6.263e-03, -1.972e-02, 5.096e-03, 1.872e-01, -1.115e-01));
	r += mul(s3_6, M4(-2.659e-02, -4.438e-02, 3.411e-02, -4.316e-02, 2.489e-02, -5.362e-02, 6.324e-03, 2.250e-02, 6.458e-02, -1.310e-01, 1.392e-01, 5.710e-02, -2.336e-02, 3.723e-02, 2.933e-02, -9.192e-03));
	r += mul(s3_7, M4(-3.195e-02, 1.089e-02, 3.270e-02, 7.136e-02, 6.604e-03, -6.304e-02, 4.783e-02, 5.569e-02, -9.362e-02, -4.429e-02, -6.615e-02, 8.673e-02, 1.845e-01, -7.756e-03, -1.776e-01, -1.473e-02));
	r += mul(s3_8, M4(-1.581e-02, -5.631e-02, 1.651e-02, 3.415e-02, -7.547e-02, 5.109e-02, 2.791e-02, 8.264e-03, -2.090e-02, -1.188e-01, 5.602e-02, 8.052e-02, -2.345e-02, 6.393e-02, -1.052e-02, 5.089e-02));
	r += mul(s4_0, M4(2.126e-02, 6.163e-02, 5.698e-02, -4.189e-02, -1.033e-02, -8.728e-02, -5.533e-02, -3.495e-03, 9.104e-02, -4.146e-02, -3.394e-02, -3.003e-02, 9.144e-02, -2.309e-01, 1.130e-01, -1.458e-02));
	r += mul(s4_1, M4(1.959e-02, 1.240e-01, -2.060e-01, -8.081e-02, 7.705e-03, -2.860e-02, 6.000e-02, -5.123e-02, -3.581e-02, 1.002e-01, 8.903e-02, -5.170e-02, -2.747e-02, 9.524e-03, -5.792e-02, -6.941e-01));
	r += mul(s4_2, M4(4.583e-02, -6.945e-02, 8.239e-02, -9.996e-03, 9.288e-03, -2.209e-02, 3.012e-02, -2.720e-02, -6.338e-02, -1.081e-01, -1.522e-01, 1.295e-02, -4.195e-02, -1.774e-02, 9.414e-02, -1.140e-01));
	r += mul(s4_3, M4(-1.832e-02, 1.066e-01, 1.446e-01, 8.115e-02, -3.907e-02, -9.588e-03, 4.508e-02, -2.114e-02, 7.214e-03, 5.881e-02, -9.425e-03, 1.351e-02, -7.886e-03, -1.125e-02, 8.937e-02, -1.805e-01));
	r += mul(s4_4, M4(-1.545e-01, 3.279e-02, -7.314e-02, 3.145e-02, -2.324e-01, 2.880e-02, -4.514e-02, -7.526e-02, -3.248e-02, 1.011e-01, 1.070e-01, 9.826e-02, -6.378e-01, -1.113e-01, -5.476e-03, -6.405e-01));
	r += mul(s4_5, M4(4.225e-02, 4.995e-02, -5.151e-02, -6.658e-02, -3.270e-03, 2.154e-02, 4.740e-02, 3.039e-03, -1.839e-02, -1.744e-01, -1.593e-01, 6.823e-02, -1.242e-01, -1.483e-02, -1.341e-01, -2.339e-01));
	r += mul(s4_6, M4(1.194e-02, -7.981e-02, 6.914e-02, 3.322e-02, 5.713e-02, -4.931e-02, -3.948e-02, -2.579e-03, -4.410e-02, 6.070e-02, 1.195e-03, -3.376e-02, 1.641e-02, -5.587e-02, -4.053e-02, -1.630e-02));
	r += mul(s4_7, M4(4.789e-02, -4.548e-02, 3.627e-02, 7.645e-02, -4.072e-02, 1.233e-01, 2.409e-02, 2.719e-02, 1.063e-01, 6.620e-03, 5.710e-02, 2.887e-02, -1.481e-02, 1.618e-02, -4.501e-02, -7.504e-02));
	r += mul(s4_8, M4(3.256e-02, -6.711e-02, -7.155e-02, 1.077e-03, 1.179e-01, 1.927e-02, -7.517e-02, -5.050e-03, -1.652e-02, 2.988e-02, -6.473e-02, -2.233e-02, 1.078e-01, 1.606e-02, 1.051e-01, 1.858e-02));
	r += mul(s5_0, M4(1.185e-02, -8.303e-02, 7.525e-02, -3.948e-02, -2.980e-02, -5.279e-03, 1.521e-02, -3.329e-02, 3.512e-02, -4.894e-03, 1.869e-03, -9.467e-02, 4.547e-02, -1.113e-01, -4.921e-04, -3.650e-02));
	r += mul(s5_1, M4(-2.533e-02, -5.457e-02, 6.664e-02, -1.109e-01, 2.634e-03, -3.898e-02, -1.115e-01, -9.777e-02, 4.398e-03, 2.218e-01, -8.076e-02, -8.298e-03, -1.466e-03, 6.349e-02, -9.325e-02, -7.098e-02));
	r += mul(s5_2, M4(-1.490e-02, -1.382e-01, 6.412e-02, 4.542e-02, 2.084e-02, 3.895e-02, -1.326e-01, -2.134e-02, 5.695e-02, 1.803e-01, -6.086e-02, -8.669e-02, 3.222e-02, -8.005e-02, 6.315e-02, -7.551e-02));
	r += mul(s5_3, M4(-1.885e-03, 9.724e-02, -8.784e-02, 1.272e-02, -5.185e-02, -1.148e-02, -4.775e-02, 1.111e-02, -3.767e-02, -4.852e-02, 1.005e-01, 3.394e-02, -2.777e-02, 1.755e-02, 2.754e-02, -5.608e-02));
	r += mul(s5_4, M4(-8.003e-02, -3.442e-02, -1.281e-03, -5.182e-03, -3.213e-01, -1.581e-02, 6.228e-02, -1.903e-01, -1.459e-01, -1.218e-01, 2.070e-01, 4.157e-01, -1.788e-01, 1.294e-01, -6.393e-02, -4.187e-02));
	r += mul(s5_5, M4(-3.751e-02, 6.109e-02, 4.843e-02, -1.268e-02, -1.034e-01, -6.947e-02, -2.445e-02, -4.195e-02, -1.025e-01, -2.431e-01, -2.601e-01, 1.780e-01, -6.830e-02, -6.775e-02, -2.807e-02, -7.518e-02));
	r += mul(s5_6, M4(6.528e-03, -7.250e-02, 1.841e-02, 2.886e-02, 1.267e-02, 9.469e-03, -2.174e-01, -1.515e-02, -3.808e-02, 3.925e-02, -5.356e-02, -4.861e-02, 6.533e-03, 1.848e-02, 4.500e-04, -4.908e-02));
	r += mul(s5_7, M4(-4.713e-02, -4.756e-02, 3.375e-02, 4.234e-02, -2.338e-01, 1.421e-01, 4.348e-02, -1.375e-01, 7.462e-02, 7.048e-02, -7.856e-02, -1.155e-01, 3.095e-02, 4.129e-03, -2.738e-02, -2.082e-02));
	r += mul(s5_8, M4(1.326e-02, -8.204e-02, -7.037e-02, 5.335e-02, -5.314e-03, 7.717e-02, 2.884e-02, -8.406e-02, 9.963e-03, 1.188e-01, -2.833e-02, 1.061e-02, 1.649e-02, 6.136e-03, 2.692e-02, -6.118e-02));
	r += mul(s6_0, M4(-1.225e-01, -3.199e-02, -2.469e-02, -3.257e-01, 2.266e-03, 5.483e-02, -2.814e-02, 4.752e-02, 4.902e-02, 8.969e-02, -1.586e-02, 4.620e-02, 4.357e-03, 7.414e-02, -4.872e-02, -4.663e-02));
	r += mul(s6_1, M4(-4.936e-02, 1.609e-01, -1.327e-01, -1.711e-01, 2.718e-02, 8.757e-03, -4.149e-02, 4.596e-03, -2.391e-02, 1.315e-02, 1.624e-01, 1.233e-01, 1.183e-01, 1.777e-01, -2.123e-02, 4.965e-02));
	r += mul(s6_2, M4(-1.488e-02, 2.607e-02, -2.137e-01, -9.055e-02, 1.082e-01, -7.058e-03, -8.666e-02, 4.212e-02, -9.235e-03, -9.004e-02, 1.824e-01, 3.037e-02, 3.261e-02, 1.020e-01, -2.231e-02, -1.477e-02));
	r += mul(s6_3, M4(-1.582e-01, -7.736e-04, 1.119e-01, -2.861e-01, 1.220e-01, -1.340e-01, -6.330e-02, 6.531e-02, 1.245e-01, 5.890e-02, -6.131e-02, 5.569e-02, -1.808e-01, 6.471e-02, 2.407e-02, -9.163e-02));
	r += mul(s6_4, M4(5.294e-02, 2.264e-02, 9.260e-02, 2.702e-03, 6.289e-02, -1.609e-01, -1.517e-02, -9.012e-02, 9.663e-02, 8.651e-02, -7.378e-02, -2.462e-01, 7.932e-03, 5.327e-02, -6.168e-02, -2.111e-02));
	r += mul(s6_5, M4(-5.505e-02, 1.211e-01, 2.485e-02, 6.687e-05, 2.232e-02, 4.068e-02, -8.433e-02, -5.399e-02, 1.062e-01, 4.350e-02, 1.213e-01, -1.309e-03, 4.683e-02, 5.696e-02, -2.095e-01, 2.013e-02));
	r += mul(s6_6, M4(-6.952e-02, 8.277e-02, 6.191e-02, -1.102e-01, 6.552e-02, -1.717e-02, -4.441e-02, 3.744e-02, -1.211e-03, 2.891e-02, 2.197e-02, 3.364e-02, 7.535e-03, -6.770e-03, -6.397e-02, -1.047e-01));
	r += mul(s6_7, M4(-1.045e-02, 1.324e-01, -6.323e-03, -1.891e-01, -1.024e-01, 8.601e-03, -3.786e-02, -4.529e-02, -1.346e-01, -2.908e-02, -1.530e-03, -5.581e-02, -5.267e-02, 4.319e-02, 6.115e-04, -4.299e-02));
	r += mul(s6_8, M4(-1.878e-02, 2.048e-01, -4.330e-02, -4.895e-02, -1.007e-02, 1.237e-02, -2.172e-02, -8.858e-03, 1.403e-02, -6.982e-02, 1.142e-01, -2.795e-03, 6.235e-02, 2.970e-02, -8.211e-02, 3.042e-02));
	r += mul(s7_0, M4(-5.024e-02, -3.928e-02, 8.248e-03, 3.024e-02, 5.021e-02, -2.346e-02, 6.404e-02, 1.671e-01, 4.870e-02, 4.704e-03, -2.799e-02, -2.238e-02, 3.293e-02, -5.885e-02, 8.025e-02, 8.911e-03));
	r += mul(s7_1, M4(4.734e-02, -5.602e-02, -8.165e-02, 8.589e-02, -6.049e-02, -2.462e-01, 9.227e-03, -2.058e-01, -2.801e-02, 1.579e-01, 3.732e-02, -4.812e-02, 1.353e-01, -3.953e-02, -9.637e-02, 8.001e-02));
	r += mul(s7_2, M4(-3.973e-03, -1.591e-02, 1.366e-03, 4.087e-02, 5.343e-02, -5.346e-02, -5.055e-02, 2.863e-02, 5.103e-02, 1.944e-02, 6.019e-02, -8.776e-02, 3.405e-02, 2.922e-02, 1.362e-01, -7.645e-03));
	r += mul(s7_3, M4(-4.861e-04, -4.012e-02, 4.286e-02, -4.906e-03, 1.627e-01, -1.097e-01, -1.243e-01, -6.100e-02, 6.076e-02, -1.064e-02, 2.648e-02, 6.042e-02, -7.793e-02, -1.243e-02, 6.117e-02, -5.637e-02));
	r += mul(s7_4, M4(2.827e-02, 3.054e-02, 1.967e-02, 1.132e-01, 1.891e-01, -1.120e-01, 6.793e-02, 1.781e-01, -6.503e-02, 3.875e-02, -1.368e-01, -6.066e-02, 8.103e-02, 4.358e-02, -1.609e-01, 3.028e-02));
	r += mul(s7_5, M4(-2.243e-02, 6.646e-03, 4.602e-02, -8.031e-02, -2.034e-01, 2.752e-02, 8.253e-02, 5.569e-03, 1.200e-01, 1.976e-02, 1.460e-01, 1.773e-01, 5.920e-02, -8.765e-02, -3.917e-02, 2.605e-02));
	r += mul(s7_6, M4(3.516e-02, -1.987e-02, -3.543e-02, 5.867e-02, -3.802e-02, 5.770e-02, 1.447e-01, 2.270e-02, 1.202e-02, 6.453e-02, -6.165e-03, -1.177e-02, 9.626e-02, -5.555e-02, -9.464e-02, -6.232e-02));
	r += mul(s7_7, M4(-8.539e-04, -4.631e-02, 5.124e-02, 2.576e-02, -7.279e-02, 1.385e-01, -1.287e-01, -5.966e-02, -9.662e-03, 5.361e-02, -5.509e-03, -2.013e-02, 6.722e-02, -7.907e-02, -6.298e-02, -6.806e-02));
	r += mul(s7_8, M4(-1.113e-02, -1.198e-02, 6.223e-03, -3.262e-04, 2.197e-02, 1.376e-01, -2.326e-02, -9.016e-02, 2.050e-02, 3.226e-02, 3.427e-02, -4.716e-02, 7.373e-02, 5.725e-02, 5.104e-02, 5.237e-02));
	r += V4(2.325e-03, -1.910e-04, -2.453e-02, -3.947e-02);
	return r;
}

V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) {
	V4 r = 0.0;
	r += mul(s0_0, M4(1.452e-02, -2.213e-02, 3.418e-02, -1.868e-03, 5.112e-02, 2.362e-02, 6.041e-02, 2.931e-02, 3.201e-02, -6.846e-02, 1.007e-02, -1.945e-02, -1.491e-01, -1.026e-01, -6.356e-02, 8.083e-03));
	r += mul(s0_1, M4(-3.302e-02, 1.039e-02, -7.105e-02, 4.512e-02, 1.699e-02, -4.399e-02, -1.129e-01, -3.180e-03, 4.396e-02, -2.110e-02, -2.890e-02, -1.940e-02, 1.083e-01, 1.618e-03, -4.433e-02, 2.564e-02));
	r += mul(s0_2, M4(-2.591e-02, 4.649e-03, -1.796e-04, -1.397e-04, 3.523e-03, 3.807e-03, -2.256e-02, -2.057e-02, 2.152e-02, -4.283e-02, -4.498e-03, 5.868e-02, 8.216e-02, -1.374e-02, -1.599e-02, 1.185e-01));
	r += mul(s0_3, M4(-2.138e-02, -2.510e-02, 3.635e-03, 5.926e-02, 1.948e-02, -1.106e-02, 2.721e-02, -6.293e-03, -1.257e-01, 3.189e-02, -2.102e-02, -1.052e-01, -1.488e-01, -2.395e-02, -1.034e-01, 4.175e-02));
	r += mul(s0_4, M4(3.801e-02, -5.686e-02, -2.110e-02, 8.493e-02, -4.898e-02, -1.377e-01, 2.817e-02, -1.992e-01, 2.107e-01, 2.706e-01, -6.196e-02, -1.218e-01, 1.192e-01, 1.155e-01, -1.004e-01, 1.263e-01));
	r += mul(s0_5, M4(6.634e-02, -6.558e-02, -5.310e-02, 8.774e-02, -1.082e-01, -1.092e-01, 9.039e-02, 4.413e-02, -1.282e-01, 4.255e-02, 2.607e-02, 3.934e-02, -3.736e-02, -3.743e-02, -7.450e-02, 5.096e-02));
	r += mul(s0_6, M4(-3.132e-02, 1.058e-01, 5.335e-02, 3.218e-02, -8.362e-03, -1.239e-02, 7.032e-02, -2.217e-02, -9.698e-02, -2.439e-02, -2.590e-02, -1.107e-01, -1.166e-02, 1.432e-03, -2.418e-02, -4.755e-02));
	r += mul(s0_7, M4(1.094e-02, -5.756e-02, -1.564e-02, -5.575e-02, -5.384e-02, 5.222e-03, -1.447e-02, 3.893e-02, -1.928e-01, 3.496e-02, 2.032e-01, 5.912e-02, 6.310e-02, 1.259e-02, -1.224e-02, 8.429e-02));
	r += mul(s0_8, M4(-6.143e-02, 8.296e-02, 3.608e-02, -6.786e-03, -8.912e-02, 7.147e-02, 3.735e-03, -1.193e-02, 6.371e-02, -1.715e-03, 6.778e-02, 4.096e-02, -2.178e-03, -2.116e-02, 3.634e-02, 2.353e-02));
	r += mul(s1_0, M4(3.726e-02, -2.700e-02, 5.479e-02, 2.802e-02, -4.521e-02, 6.023e-02, 1.441e-01, -3.327e-02, 7.922e-03, 9.608e-02, 6.132e-02, 6.058e-02, -1.181e-01, 1.375e-02, 3.535e-02, 1.659e-02));
	r += mul(s1_1, M4(9.419e-02, 4.554e-02, -1.226e-02, 3.050e-02, 1.130e-01, -5.704e-02, 4.752e-02, -1.253e-01, -5.118e-02, 4.104e-02, -6.749e-02, -5.343e-02, 5.244e-02, 6.944e-02, 1.140e-01, -9.239e-02));
	r += mul(s1_2, M4(-4.040e-02, 5.244e-02, 2.012e-02, 5.836e-02, 4.047e-02, 3.844e-02, 2.243e-01, -2.281e-02, 6.051e-02, -4.283e-02, 8.320e-02, 4.852e-02, -6.591e-03, 1.960e-02, 1.263e-01, -3.942e-02));
	r += mul(s1_3, M4(2.830e-02, 8.144e-02, -3.719e-02, -6.285e-02, -8.031e-02, -2.416e-02, -1.323e-01, -4.774e-02, 2.482e-03, 1.067e-01, 8.261e-02, 7.856e-02, -4.436e-02, 6.318e-02, -2.072e-02, 4.913e-03));
	r += mul(s1_4, M4(1.982e-02, 1.753e-01, -5.781e-02, 3.787e-01, -9.268e-02, -4.545e-01, 2.008e-01, -1.498e-02, 3.232e-02, -4.611e-02, -3.575e-01, -1.095e-01, 1.653e-02, -1.507e-02, -1.033e-03, -9.718e-02));
	r += mul(s1_5, M4(-2.268e-01, -9.784e-02, -1.059e-01, -3.203e-02, 5.056e-02, 1.258e-01, 4.908e-02, -2.078e-01, 9.348e-02, 1.395e-01, -1.733e-01, 7.156e-02, -1.265e-01, 8.872e-03, -9.426e-02, -9.058e-02));
	r += mul(s1_6, M4(-4.677e-02, -2.115e-02, -1.070e-02, 7.677e-02, 1.224e-02, -8.576e-02, 9.551e-02, -6.854e-02, 7.423e-02, 2.429e-02, 5.618e-02, -2.685e-02, -9.624e-02, -6.545e-02, 4.498e-02, -3.158e-02));
	r += mul(s1_7, M4(-1.134e-01, 8.718e-02, -1.208e-01, -1.686e-02, 1.400e-01, -3.213e-01, 9.984e-02, 1.925e-01, 9.047e-02, -7.602e-02, -1.353e-01, 3.362e-02, 3.519e-03, -7.485e-02, -4.753e-03, 3.789e-02));
	r += mul(s1_8, M4(-1.636e-01, -5.761e-02, 8.762e-02, 3.048e-02, 8.438e-02, 3.496e-02, 8.871e-02, -1.531e-01, 2.154e-02, 2.129e-02, -1.388e-01, -1.133e-02, -9.511e-02, -4.549e-02, -1.384e-02, -5.895e-02));
	r += mul(s2_0, M4(4.882e-03, 1.283e-01, -2.763e-02, 9.043e-02, -2.804e-03, -5.500e-03, 9.404e-02, 2.569e-02, 3.186e-02, 5.725e-02, -2.883e-02, -8.733e-02, -5.914e-02, 7.541e-02, -2.237e-03, 1.062e-02));
	r += mul(s2_1, M4(2.729e-02, 1.656e-02, 4.354e-02, 6.795e-02, -6.394e-02, 6.677e-04, -9.367e-02, -7.237e-03, 1.514e-03, -6.692e-02, 5.455e-02, -8.540e-03, 4.207e-02, 1.632e-01, 5.576e-02, -2.645e-02));
	r += mul(s2_2, M4(-1.607e-02, -2.096e-02, -1.031e-01, -1.291e-02, 7.235e-02, -3.595e-04, -1.461e-03, 1.701e-02, -1.732e-03, 8.503e-02, -1.746e-03, 1.168e-03, 8.124e-02, -3.384e-02, -1.580e-02, 9.406e-02));
	r += mul(s2_3, M4(-4.748e-02, 2.919e-02, 1.506e-01, 1.201e-01, 3.544e-03, 6.126e-02, 1.094e-01, 7.174e-02, -3.791e-02, 1.865e-01, -6.470e-02, -1.531e-01, 1.565e-02, 3.920e-02, 4.791e-02, 4.874e-02));
	r += mul(s2_4, M4(-6.980e-02, -3.444e-02, -7.843e-02, -7.325e-02, 1.878e-01, -9.045e-03, -8.041e-02, -2.307e-02, -5.940e-02, -3.866e-02, 6.295e-02, 1.268e-01, 1.971e-02, 1.880e-01, -7.046e-02, 7.267e-02));
	r += mul(s2_5, M4(1.551e-01, -1.490e-01, 4.663e-02, 2.092e-03, 4.003e-02, -9.856e-03, -1.524e-01, 8.802e-02, -6.113e-02, 1.032e-01, -5.811e-02, -1.167e-01, -1.040e-01, -1.417e-02, -7.664e-02, 6.155e-02));
	r += mul(s2_6, M4(9.057e-02, 1.014e-01, -9.122e-03, -1.784e-02, 3.977e-02, 4.477e-02, 4.628e-02, 2.379e-02, -1.242e-02, 6.113e-02, -9.431e-03, -5.879e-02, 2.596e-02, -2.014e-02, 3.798e-02, 2.791e-02));
	r += mul(s2_7, M4(1.446e-02, -1.929e-02, 2.062e-02, -5.054e-02, 8.961e-02, -2.919e-02, -3.591e-02, -4.359e-05, 8.416e-02, -9.472e-03, -6.747e-02, -4.886e-02, 4.601e-02, 2.285e-01, -4.229e-02, -1.883e-02));
	r += mul(s2_8, M4(2.301e-02, -8.001e-03, -3.776e-02, -4.969e-02, -6.737e-02, -7.542e-02, 9.241e-02, 4.902e-05, 1.421e-02, 1.136e-01, -1.230e-01, -2.179e-02, -9.226e-02, 1.140e-01, 5.157e-03, -4.170e-02));
	r += mul(s3_0, M4(5.915e-02, 1.352e-01, -6.930e-02, -3.117e-02, 6.114e-02, -1.653e-01, 1.082e-01, 8.015e-02, 3.380e-02, 3.502e-03, 4.469e-02, 1.862e-02, 6.960e-03, 9.355e-03, 5.013e-02, 9.937e-02));
	r += mul(s3_1, M4(-1.468e-01, 4.873e-02, 1.114e-01, -5.181e-03, 5.865e-02, -9.539e-02, -8.387e-02, 5.829e-02, 6.005e-02, -1.361e-01, -8.120e-02, -8.009e-02, 1.379e-01, -1.110e-01, -3.764e-02, 7.363e-02));
	r += mul(s3_2, M4(-1.323e-02, 1.469e-01, 4.694e-02, 1.735e-02, 3.762e-02, -9.231e-02, -7.239e-02, 1.063e-02, 4.043e-02, 4.433e-02, -5.364e-02, -3.767e-02, -3.601e-02, -6.791e-02, 7.434e-03, 8.255e-02));
	r += mul(s3_3, M4(-3.209e-02, 8.216e-02, 1.743e-01, -5.094e-02, 1.137e-01, -2.192e-01, -4.207e-02, 8.447e-03, -1.389e-02, 1.533e-01, 6.047e-03, -6.254e-03, -1.571e-02, 1.528e-02, 4.149e-02, 3.927e-02));
	r += mul(s3_4, M4(1.320e-01, -2.930e-02, -1.799e-02, 2.068e-01, 4.463e-02, -1.153e-03, -1.415e-02, -1.850e-01, 2.495e-01, 4.801e-02, -2.517e-01, 1.030e-01, -3.563e-01, 2.625e-01, 3.427e-02, 5.950e-02));
	r += mul(s3_5, M4(-7.095e-02, 5.167e-02, 1.356e-02, -5.700e-02, 1.530e-01, -1.915e-01, -1.541e-02, 1.587e-01, -3.247e-02, 1.876e-01, -2.764e-01, -2.076e-01, 3.612e-02, 1.690e-01, 8.827e-02, 6.959e-02));
	r += mul(s3_6, M4(-5.147e-02, 4.759e-02, 3.091e-02, 1.571e-02, -4.751e-02, 2.807e-02, -1.546e-02, -2.250e-02, -3.121e-03, 1.554e-02, -8.659e-02, -1.952e-01, 4.762e-04, -3.163e-02, -8.135e-02, 6.519e-02));
	r += mul(s3_7, M4(-2.127e-02, -1.332e-02, 2.117e-02, 1.499e-02, 4.184e-03, 6.348e-03, 4.794e-02, -4.764e-03, -1.938e-01, 3.070e-02, 5.067e-02, -1.063e-01, -7.583e-02, 9.846e-02, -5.883e-02, 1.245e-02));
	r += mul(s3_8, M4(-7.546e-02, -6.857e-02, 4.869e-02, 6.144e-04, 2.149e-02, -1.723e-01, 3.948e-02, 1.033e-01, -9.994e-02, -1.799e-02, -4.577e-02, 1.503e-02, 1.917e-02, 2.287e-02, -3.749e-02, 3.704e-03));
	r += mul(s4_0, M4(2.386e-02, -6.699e-02, 1.179e-01, 1.217e-01, -3.840e-03, -9.233e-02, -5.570e-02, -4.940e-02, -3.374e-02, 1.446e-02, -3.170e-02, -2.923e-02, 6.858e-03, 1.429e-02, -1.063e-01, 3.806e-02));
	r += mul(s4_1, M4(6.160e-02, 4.078e-02, -9.278e-02, 8.813e-03, 4.014e-02, 8.360e-02, 1.578e-01, 2.356e-02, 1.554e-02, -1.546e-02, 1.870e-01, -2.622e-02, 1.007e-01, -2.018e-02, 3.210e-02, -1.491e-01));
	r += mul(s4_2, M4(4.191e-02, 8.331e-02, 7.167e-02, -2.731e-02, 2.408e-03, -9.919e-03, -1.365e-03, -9.990e-03, -6.103e-02, -6.417e-02, -4.156e-02, 4.083e-02, 1.673e-01, 1.464e-02, -5.283e-02, -1.810e-02));
	r += mul(s4_3, M4(1.847e-01, -5.065e-02, 1.175e-01, 1.171e-01, 1.287e-02, -1.970e-01, 1.837e-02, 9.569e-02, -5.157e-02, 1.203e-01, 8.410e-02, -6.370e-02, 6.671e-02, 1.668e-01, 3.538e-02, 6.047e-02));
	r += mul(s4_4, M4(-1.742e-01, 1.513e-01, 1.198e-01, 3.599e-02, -5.566e-02, 9.012e-02, -1.984e-01, -2.470e-02, 1.116e-01, 3.129e-01, -8.667e-02, 1.346e-01, 1.939e-01, 2.393e-02, -9.264e-02, -2.492e-01));
	r += mul(s4_5, M4(4.079e-02, 7.863e-02, 3.043e-02, 8.451e-02, 7.184e-02, 9.325e-02, -3.495e-02, 6.198e-02, -8.641e-02, -2.968e-02, 1.519e-01, -1.383e-01, -5.764e-02, -1.684e-01, -7.344e-02, -1.298e-01));
	r += mul(s4_6, M4(2.949e-02, 5.492e-02, -6.664e-02, -1.867e-02, -2.591e-02, -2.480e-02, 2.903e-02, 1.387e-01, 5.483e-02, -1.129e-01, 1.877e-02, 7.596e-03, -1.058e-01, -1.558e-01, 9.410e-02, 6.218e-02));
	r += mul(s4_7, M4(7.291e-02, -9.164e-02, -2.596e-02, 1.246e-01, -1.701e-02, -1.238e-01, -8.126e-02, -1.795e-02, -6.595e-02, 1.063e-01, 2.183e-02, 5.273e-02, 1.348e-02, -5.259e-02, 1.109e-01, -1.108e-01));
	r += mul(s4_8, M4(-8.273e-02, -2.826e-02, 5.756e-02, -6.913e-02, -9.518e-02, 3.518e-02, -4.045e-02, -1.886e-03, -5.751e-03, 1.910e-02, 8.394e-03, -5.276e-02, 5.724e-02, 9.454e-04, 3.042e-02, 1.589e-03));
	r += mul(s5_0, M4(-4.081e-02, 2.613e-02, -1.001e-02, -7.691e-02, 6.423e-03, -7.491e-03, 7.306e-02, 5.021e-02, 5.762e-02, -7.077e-02, -6.219e-02, 1.087e-01, 4.356e-02, -1.110e-02, 2.045e-02, -4.486e-02));
	r += mul(s5_1, M4(-8.378e-02, 9.047e-02, 7.844e-02, -1.880e-01, 9.810e-02, 1.244e-01, 4.427e-02, 8.744e-02, 9.155e-02, 9.292e-02, 8.227e-02, 8.444e-02, -1.160e-02, 2.979e-02, 5.415e-02, -1.836e-02));
	r += mul(s5_2, M4(9.188e-02, 4.463e-02, 3.819e-02, 2.323e-02, -8.297e-03, -5.695e-02, -3.495e-02, -6.575e-03, -2.870e-02, 3.126e-02, -7.547e-02, 4.411e-02, 5.214e-03, 4.028e-02, -2.259e-02, 3.480e-02));
	r += mul(s5_3, M4(-3.300e-02, 4.670e-02, -6.394e-02, -7.687e-03, 8.160e-02, -1.323e-01, -3.512e-02, 6.628e-02, 5.705e-03, 2.856e-03, 9.416e-02, -2.114e-02, 2.401e-02, 9.146e-03, 7.629e-03, -6.669e-02));
	r += mul(s5_4, M4(-1.094e-01, 2.638e-01, 3.507e-02, 5.298e-03, -1.671e-01, 1.657e-01, 4.612e-02, -6.475e-02, 3.626e-01, -6.742e-02, 2.930e-01, 1.344e-01, -9.630e-02, -5.689e-02, -4.070e-02, -1.976e-01));
	r += mul(s5_5, M4(5.550e-02, 7.444e-02, 6.249e-02, 9.736e-02, -9.858e-02, -3.331e-02, -3.191e-02, -1.121e-02, -2.269e-01, -2.242e-01, -6.816e-04, -3.120e-01, -1.583e-02, 8.640e-03, -3.154e-02, -1.502e-01));
	r += mul(s5_6, M4(3.199e-02, 6.824e-02, -5.314e-02, 6.801e-03, -1.462e-01, 7.690e-02, 5.163e-02, 3.599e-02, 1.236e-02, -5.421e-02, 2.565e-02, -3.847e-03, -3.625e-02, 4.862e-02, -1.461e-02, 1.887e-02));
	r += mul(s5_7, M4(1.903e-02, 1.049e-01, -9.422e-02, 7.068e-03, 7.797e-02, -2.580e-02, -2.133e-02, -8.385e-02, 2.896e-01, -7.134e-02, -3.428e-02, 3.129e-02, -8.228e-02, 6.240e-02, 1.937e-02, -5.818e-02));
	r += mul(s5_8, M4(2.743e-02, -1.725e-02, 2.877e-02, 1.920e-02, -9.789e-02, -9.666e-02, -1.232e-02, -1.358e-01, -5.698e-02, -6.617e-02, -2.075e-01, -9.234e-02, 7.224e-02, 5.932e-02, 3.815e-02, 5.836e-03));
	r += mul(s6_0, M4(1.294e-01, -1.894e-01, 1.271e-02, -3.241e-02, 6.861e-02, 7.944e-03, -6.318e-02, 7.642e-02, -1.590e-03, 2.999e-02, -1.520e-01, -3.941e-02, 8.424e-02, 1.252e-01, 3.210e-02, 3.888e-02));
	r += mul(s6_1, M4(7.113e-03, -3.657e-01, -5.681e-02, -2.579e-02, 3.934e-03, -6.467e-02, -4.227e-02, -1.791e-02, -4.824e-02, -4.570e-02, 1.281e-01, -1.851e-01, -7.617e-02, -2.520e-02, 4.820e-02, -3.522e-02));
	r += mul(s6_2, M4(7.420e-02, -1.187e-01, 1.931e-02, -7.240e-02, 1.547e-02, 1.421e-02, -2.403e-02, 5.392e-02, -2.654e-02, -3.127e-02, 5.019e-02, -9.106e-03, -2.130e-03, 7.296e-02, -1.444e-02, 4.740e-02));
	r += mul(s6_3, M4(5.434e-02, -9.763e-02, -1.274e-01, 3.995e-02, -4.936e-03, -9.896e-02, -1.038e-01, -9.221e-03, 1.122e-02, 7.027e-02, -1.762e-01, 9.409e-03, -1.257e-02, -4.714e-02, 1.009e-01, -5.563e-02));
	r += mul(s6_4, M4(2.441e-01, -1.990e-01, 8.441e-02, 1.368e-01, 9.119e-02, 6.636e-03, -4.092e-02, 3.751e-02, 8.522e-02, 2.960e-02, -5.376e-02, -1.098e-01, 1.705e-01, -1.020e-01, -8.451e-02, 1.714e-01));
	r += mul(s6_5, M4(1.497e-01, -7.163e-02, -7.517e-02, 1.158e-01, -4.815e-02, 4.708e-02, -6.679e-03, -1.606e-02, 6.199e-02, -6.321e-02, -1.431e-02, -8.619e-02, -1.887e-01, 1.032e-01, -1.129e-01, -5.719e-02));
	r += mul(s6_6, M4(1.094e-01, -9.081e-02, -1.038e-01, -3.552e-02, -5.055e-03, -7.073e-02, -3.967e-02, 1.570e-02, -4.736e-02, -9.455e-02, 1.999e-02, 8.959e-03, 1.274e-02, 9.704e-02, 3.143e-02, -6.793e-03));
	r += mul(s6_7, M4(2.531e-01, -6.100e-02, -2.354e-01, -6.290e-02, -4.297e-02, 2.630e-03, 1.902e-02, -4.228e-02, -5.898e-02, 3.427e-02, 2.461e-02, -7.373e-02, 1.105e-01, -1.016e-01, 1.337e-02, -7.272e-02));
	r += mul(s6_8, M4(9.801e-02, -5.510e-02, -1.810e-01, -5.994e-02, 7.279e-02, -1.181e-02, 6.652e-02, 2.043e-03, 7.534e-02, -1.073e-01, 1.965e-02, -4.311e-02, -1.470e-01, 4.022e-02, 1.256e-02, -1.590e-01));
	r += mul(s7_0, M4(2.198e-03, 1.195e-02, 4.987e-02, 2.487e-02, -3.843e-02, 6.466e-02, 6.872e-03, -4.453e-02, -4.167e-02, 3.627e-02, 9.219e-03, -1.346e-02, 7.166e-02, 2.580e-02, -2.460e-03, 1.006e-01));
	r += mul(s7_1, M4(-1.733e-02, -1.171e-02, -9.107e-02, 7.854e-02, 5.693e-02, -1.864e-02, 1.852e-02, 5.034e-02, 7.212e-02, 3.279e-02, 7.340e-03, -1.160e-02, 1.476e-02, -1.143e-01, 5.309e-02, 8.046e-02));
	r += mul(s7_2, M4(1.709e-03, -9.215e-03, 3.640e-02, -2.015e-02, -7.813e-02, -2.006e-02, 5.107e-03, -3.323e-02, 3.283e-03, 1.799e-01, -6.751e-02, -3.501e-02, 5.822e-02, -1.271e-02, -1.269e-02, 3.323e-02));
	r += mul(s7_3, M4(-4.981e-02, 1.224e-01, 9.955e-03, 4.094e-02, -1.480e-01, -1.458e-01, -1.077e-01, 4.140e-02, 6.963e-02, -9.296e-03, -7.067e-02, 6.662e-02, -1.469e-01, -7.788e-02, 3.749e-02, -6.122e-02));
	r += mul(s7_4, M4(8.569e-02, -3.485e-03, 6.274e-02, 4.062e-02, -8.150e-02, -2.033e-02, -4.892e-02, -3.897e-02, -7.900e-02, 3.007e-02, -7.428e-02, -9.108e-02, 1.780e-01, -1.877e-01, -8.340e-02, 2.277e-01));
	r += mul(s7_5, M4(-2.498e-03, 4.742e-02, -1.618e-02, 5.209e-02, 7.432e-02, -5.888e-02, 5.732e-02, 2.196e-02, 2.111e-02, -1.725e-01, 5.155e-02, -7.693e-03, -1.316e-01, 6.684e-04, 3.329e-02, 4.710e-02));
	r += mul(s7_6, M4(-3.028e-03, -1.294e-02, 7.406e-03, 1.783e-02, 6.747e-03, 1.236e-02, 5.246e-02, 1.643e-01, -5.246e-02, 5.649e-03, -3.658e-02, -5.173e-02, -2.663e-02, -1.295e-01, 3.044e-02, -1.094e-03));
	r += mul(s7_7, M4(-1.570e-02, 1.167e-01, 2.844e-02, -6.654e-03, 1.367e-01, -2.043e-01, -9.955e-02, -1.525e-01, 6.153e-02, 7.433e-03, -1.944e-02, 1.738e-02, 6.937e-02, -2.045e-02, 1.065e-01, -1.348e-01));
	r += mul(s7_8, M4(-6.352e-03, 3.662e-02, 5.139e-02, 2.546e-02, 6.839e-02, -2.401e-02, -9.333e-02, -1.460e-01, 6.587e-02, 6.411e-02, -6.108e-02, -1.712e-02, -3.972e-02, -4.960e-02, 2.875e-02, 1.562e-02));
	r += V4(-2.817e-02, -3.278e-02, 5.487e-03, -3.455e-03);
	return r;
}

V4 f2(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) {
	V4 r = 0.0;
	r += mul(s0_0, M4(8.094e-04, 3.656e-02, 2.530e-02, 1.321e-02, 3.186e-02, 2.905e-02, -3.188e-02, 2.978e-03, 1.053e-01, -6.066e-02, 4.463e-02, -9.486e-02, -5.617e-02, 1.871e-01, 1.274e-01, -8.033e-02));
	r += mul(s0_1, M4(3.014e-02, 1.179e-01, -4.175e-03, -4.586e-02, -6.619e-02, -1.521e-02, -1.647e-02, 9.131e-04, 2.558e-02, 4.775e-02, -5.385e-02, 4.971e-02, -8.648e-02, 7.845e-02, -3.333e-02, -3.736e-02));
	r += mul(s0_2, M4(2.093e-02, 1.867e-03, -5.568e-02, 6.354e-03, -1.903e-02, -1.263e-02, -7.210e-03, -2.144e-03, -3.158e-03, -2.480e-02, 1.009e-01, 2.064e-02, -3.645e-02, -2.622e-02, -5.732e-02, -2.596e-02));
	r += mul(s0_3, M4(-7.684e-02, 1.370e-01, -3.305e-02, -5.904e-03, -4.982e-03, -1.551e-01, 5.245e-02, -2.731e-02, -3.460e-02, 2.563e-02, -1.279e-01, 1.652e-04, -3.821e-01, 1.504e-01, -1.377e-02, -6.622e-02));
	r += mul(s0_4, M4(-1.214e-01, -8.551e-02, -1.371e-01, 3.295e-02, 7.233e-02, 9.524e-02, 1.523e-01, -1.300e-02, -9.606e-03, 1.983e-01, 2.064e-01, -8.872e-02, 4.599e-02, 1.161e-01, 1.250e-01, -1.654e-01));
	r += mul(s0_5, M4(-4.586e-02, -6.165e-02, -6.160e-02, 1.361e-02, 2.826e-02, 4.357e-02, 3.549e-02, 3.897e-02, -5.086e-02, -1.082e-01, 1.085e-01, 1.877e-01, -4.027e-02, -2.616e-02, 4.383e-02, -8.405e-02));
	r += mul(s0_6, M4(-7.785e-02, 4.741e-02, -3.778e-02, -8.051e-02, 1.224e-01, -5.179e-02, -2.464e-02, 6.087e-02, -7.910e-02, 1.355e-01, -6.820e-02, 5.459e-02, -7.689e-02, 7.860e-02, 7.812e-02, -2.411e-02));
	r += mul(s0_7, M4(-1.077e-01, -6.815e-02, -5.486e-02, -2.057e-02, 5.878e-02, 1.066e-01, -5.390e-02, 4.218e-02, -1.593e-01, 6.220e-02, 2.660e-02, 3.090e-02, -1.283e-01, -1.931e-02, -6.371e-02, -2.323e-02));
	r += mul(s0_8, M4(3.539e-02, 2.408e-04, 1.936e-02, -3.251e-02, -1.620e-02, -8.020e-02, 1.042e-01, 6.912e-04, -6.654e-02, 1.033e-01, 2.011e-01, -8.273e-02, -3.157e-02, 2.211e-02, 4.827e-02, 3.244e-02));
	r += mul(s1_0, M4(-2.871e-02, -2.896e-02, -4.957e-02, 9.638e-03, -6.017e-02, 4.270e-01, 1.089e-01, -1.574e-01, 5.912e-02, -8.238e-03, 6.139e-02, -4.290e-02, -6.069e-02, 1.503e-01, 1.930e-02, 3.593e-02));
	r += mul(s1_1, M4(8.551e-02, -1.489e-01, -1.031e-02, -5.748e-02, -1.455e-02, 8.760e-02, -6.351e-02, 2.076e-01, -6.131e-02, 1.324e-02, 1.597e-02, -9.106e-03, -7.278e-02, 3.747e-04, 1.872e-02, 7.650e-02));
	r += mul(s1_2, M4(1.915e-02, -7.556e-02, 3.498e-02, 6.874e-02, 3.128e-02, 2.021e-01, 4.100e-02, 4.759e-02, 2.797e-03, 7.574e-02, -2.713e-02, -6.555e-02, 4.066e-02, 3.684e-03, -6.114e-02, 9.379e-05));
	r += mul(s1_3, M4(-1.447e-02, 2.614e-03, 1.120e-01, -5.897e-02, 5.813e-02, 2.165e-01, 2.444e-01, 8.125e-02, -1.264e-03, -3.225e-02, -1.145e-01, -8.569e-02, -1.534e-01, -1.499e-01, -1.726e-01, 2.016e-01));
	r += mul(s1_4, M4(3.678e-02, -2.782e-01, 1.807e-01, 1.374e-01, 1.275e-01, 1.405e-01, -1.047e-01, 4.539e-02, -1.196e-01, -2.815e-02, 5.395e-02, 3.457e-02, 1.847e-01, -1.763e-01, -9.819e-02, 2.932e-01));
	r += mul(s1_5, M4(-2.290e-02, 5.359e-02, 1.682e-01, -4.842e-02, -2.260e-01, 1.002e-01, 1.145e-02, -2.250e-01, 6.991e-03, 1.070e-01, 1.103e-01, -1.632e-02, 8.423e-02, 1.185e-01, 4.030e-02, 4.863e-03));
	r += mul(s1_6, M4(1.285e-01, 1.951e-01, 9.006e-02, -8.574e-02, 7.778e-02, 2.669e-01, 1.092e-01, -2.875e-02, 2.324e-02, 5.815e-02, -4.646e-02, 2.474e-02, 6.236e-03, -4.835e-02, 2.692e-02, -1.556e-02));
	r += mul(s1_7, M4(6.567e-02, -1.771e-01, 1.636e-01, 1.389e-01, 1.923e-02, -2.261e-02, -1.336e-01, 7.837e-02, 7.038e-02, 7.848e-02, 7.020e-02, -4.635e-02, 2.443e-02, -1.174e-01, -1.740e-02, 2.764e-02));
	r += mul(s1_8, M4(4.308e-02, -1.546e-02, -3.099e-02, 3.656e-02, 1.635e-01, 3.060e-01, 9.050e-02, 1.285e-01, -1.200e-01, -7.059e-02, 7.957e-03, 4.086e-02, 9.437e-02, 5.796e-02, -4.148e-03, 3.257e-02));
	r += mul(s2_0, M4(-2.373e-02, -4.504e-02, -1.705e-03, 5.850e-02, 4.369e-02, -2.675e-02, -1.464e-02, 1.841e-03, -2.994e-02, -8.266e-03, 1.376e-03, -7.074e-02, 5.712e-03, 6.283e-02, -1.036e-02, -4.803e-02));
	r += mul(s2_1, M4(1.495e-02, 1.791e-01, 4.742e-02, -1.452e-01, -1.541e-02, 1.155e-01, 5.500e-02, -1.145e-01, -9.387e-03, 1.789e-02, 3.983e-02, 6.136e-02, -1.055e-01, 1.866e-01, -4.713e-02, -5.747e-02));
	r += mul(s2_2, M4(-1.225e-01, 2.266e-02, 1.181e-02, -7.051e-03, -6.077e-02, 9.735e-02, -7.292e-02, -8.954e-02, 1.563e-02, -2.959e-03, 2.662e-02, -2.103e-02, 8.584e-03, -9.759e-02, -7.741e-02, 3.616e-02));
	r += mul(s2_3, M4(3.635e-02, -1.645e-01, -1.969e-02, 4.328e-03, 4.810e-02, 3.235e-02, -1.383e-01, -2.948e-02, 5.382e-02, -8.886e-02, 8.926e-03, -2.520e-02, 6.535e-02, -7.581e-02, -2.856e-02, 1.286e-02));
	r += mul(s2_4, M4(5.513e-02, 7.227e-02, 2.869e-02, 2.017e-02, 8.521e-02, 2.349e-02, 2.405e-01, -1.228e-01, 2.694e-02, 1.984e-02, 6.508e-02, -5.179e-02, 1.851e-02, 1.414e-01, 4.834e-01, -7.654e-02));
	r += mul(s2_5, M4(-4.136e-02, -6.083e-02, -4.117e-02, 4.112e-03, -6.040e-02, -1.106e-01, 3.498e-02, 4.838e-02, 4.583e-02, 5.135e-02, 3.665e-02, 1.567e-01, 7.413e-02, -1.082e-01, 2.292e-01, 1.199e-01));
	r += mul(s2_6, M4(-5.964e-03, -1.046e-01, 1.439e-01, -3.429e-02, -3.755e-02, 4.449e-02, 8.300e-02, 4.855e-02, 2.292e-02, -4.491e-02, -5.796e-02, 4.857e-02, 5.673e-03, -6.138e-02, 2.884e-02, -6.701e-04));
	r += mul(s2_7, M4(2.151e-02, 1.747e-01, -1.631e-01, 9.152e-02, 5.497e-02, -6.721e-02, -1.185e-02, 6.380e-02, 1.387e-02, 1.475e-01, -2.808e-02, -9.629e-02, 7.208e-02, 1.401e-01, -7.801e-02, 6.167e-02));
	r += mul(s2_8, M4(-4.352e-02, 1.319e-03, 8.084e-02, 3.737e-03, 6.664e-02, 1.447e-01, -1.793e-02, -4.077e-02, -1.716e-02, -6.143e-02, 4.823e-03, 5.037e-02, -9.153e-03, 5.060e-02, 1.001e-01, 4.966e-02));
	r += mul(s3_0, M4(5.649e-02, -3.891e-02, -2.430e-02, -2.174e-02, 1.105e-01, -3.841e-02, 4.498e-02, -5.182e-02, 9.689e-03, -9.495e-02, 5.334e-03, -6.659e-02, 3.015e-02, 4.723e-02, 1.369e-02, -1.516e-02));
	r += mul(s3_1, M4(1.232e-02, -6.351e-02, -1.843e-01, -8.472e-02, 1.921e-02, -6.587e-02, -1.244e-02, -3.887e-02, 2.925e-04, 1.065e-01, 3.573e-02, 4.122e-02, 6.901e-03, -7.812e-02, 7.476e-02, 4.080e-02));
	r += mul(s3_2, M4(-6.150e-03, 3.499e-02, -5.778e-02, -7.025e-02, 2.570e-02, -2.516e-02, -5.620e-02, -2.414e-02, -8.359e-03, -2.838e-02, 8.285e-02, 1.164e-01, -8.725e-03, 2.962e-02, 5.898e-02, -1.734e-02));
	r += mul(s3_3, M4(4.642e-02, 6.787e-02, -1.394e-01, -1.438e-03, 3.154e-02, 2.288e-02, -1.279e-02, 5.279e-02, 7.700e-02, 9.242e-02, 6.436e-02, -1.958e-02, 2.512e-02, -1.689e-01, 1.819e-01, -4.532e-02));
	r += mul(s3_4, M4(-3.873e-02, -1.275e-01, -2.041e-01, -3.506e-02, 2.099e-02, 3.069e-01, 1.333e-01, -2.164e-01, 3.009e-02, -2.383e-02, 1.660e-01, 7.033e-03, -1.980e-01, 1.627e-01, -3.039e-02, -1.680e-01));
	r += mul(s3_5, M4(-1.979e-02, -3.970e-02, 1.013e-01, 1.470e-02, -8.796e-03, -1.388e-01, -2.466e-01, -1.449e-02, -4.928e-02, -1.117e-01, 6.766e-03, 2.252e-01, -8.004e-02, -1.170e-01, 1.027e-02, -4.645e-02));
	r += mul(s3_6, M4(3.630e-02, 1.871e-02, -7.153e-02, -1.659e-02, -2.352e-02, 5.419e-02, 6.725e-02, 5.810e-03, -9.791e-02, -6.364e-03, 1.147e-01, 1.143e-01, 7.626e-02, 6.141e-02, 3.847e-03, 4.719e-02));
	r += mul(s3_7, M4(2.607e-02, 1.518e-02, 4.637e-02, 2.521e-02, -5.243e-02, 2.306e-02, -4.595e-02, -5.553e-02, 2.664e-02, 1.821e-01, 1.098e-01, -1.458e-01, -2.127e-01, -1.272e-02, 6.474e-02, 6.118e-02));
	r += mul(s3_8, M4(2.075e-02, 1.177e-02, -4.928e-02, 1.140e-02, -1.305e-01, -2.648e-03, 1.769e-02, -1.216e-01, 1.441e-01, 3.274e-02, 3.489e-02, -6.273e-02, -7.208e-02, 1.051e-01, 1.537e-01, -1.123e-01));
	r += mul(s4_0, M4(7.110e-02, 4.850e-02, -1.157e-02, 4.185e-02, 5.356e-02, 4.014e-02, 7.733e-02, -1.246e-02, -2.777e-02, -2.533e-02, 2.298e-02, -2.722e-02, -1.042e-01, 3.940e-02, 9.855e-02, -1.004e-01));
	r += mul(s4_1, M4(2.769e-02, 4.162e-02, -1.011e-01, 1.103e-01, -4.562e-02, -2.646e-02, -3.413e-02, 4.516e-02, -5.508e-02, 1.650e-01, 6.129e-02, 1.280e-01, -1.667e-01, 6.874e-02, -1.240e-01, 1.050e-01));
	r += mul(s4_2, M4(2.617e-02, 1.081e-02, 5.938e-02, -2.068e-02, 9.066e-02, 5.606e-02, 1.225e-02, 6.061e-03, -2.554e-03, -1.691e-02, 4.391e-02, 7.897e-03, -3.455e-02, 4.846e-02, -9.722e-02, -4.085e-02));
	r += mul(s4_3, M4(1.545e-01, -1.108e-01, 2.369e-01, 3.505e-02, -6.744e-02, 8.668e-02, -6.337e-02, -2.470e-03, 1.048e-01, 3.657e-03, 6.390e-03, -7.144e-02, -2.276e-02, -1.648e-01, -2.147e-01, -1.581e-03));
	r += mul(s4_4, M4(1.460e-01, 9.660e-03, -1.189e-01, 3.128e-02, 1.497e-01, 5.374e-02, 2.867e-02, 3.194e-02, -1.442e-01, -4.870e-02, 7.521e-04, 2.193e-01, 5.682e-01, -3.875e-01, 4.589e-01, 4.727e-01));
	r += mul(s4_5, M4(2.797e-02, -5.300e-02, -2.445e-01, -5.600e-02, -6.601e-02, -2.028e-02, -3.025e-02, -3.851e-02, 2.073e-02, -2.140e-02, -2.093e-01, 2.266e-01, 8.101e-02, 1.409e-01, 8.547e-02, -8.630e-02));
	r += mul(s4_6, M4(8.292e-02, -7.876e-02, -8.856e-02, 1.907e-02, -6.368e-02, -3.828e-02, -1.131e-01, 4.408e-02, 7.031e-03, -4.064e-02, 6.172e-02, -6.402e-02, -5.491e-02, -4.418e-02, -1.405e-01, -5.717e-02));
	r += mul(s4_7, M4(6.500e-02, -7.290e-03, 2.594e-02, 4.244e-02, 9.845e-02, -1.441e-01, 1.111e-01, -1.345e-02, -4.688e-02, -1.691e-01, 9.880e-02, 4.007e-02, -1.081e-01, 2.336e-01, 1.152e-01, -7.479e-02));
	r += mul(s4_8, M4(1.162e-01, -2.099e-02, 1.145e-01, 1.166e-01, -8.507e-02, 5.079e-02, 7.288e-02, -1.387e-01, 5.778e-02, 6.205e-02, -3.556e-02, 8.295e-02, -8.924e-02, -9.102e-02, -5.964e-02, 8.599e-02));
	r += mul(s5_0, M4(-1.462e-02, 8.082e-02, 4.455e-02, -4.356e-02, 7.335e-02, 4.376e-02, 8.118e-03, -4.540e-03, 1.430e-02, -9.267e-03, -4.582e-02, -8.926e-04, -5.143e-02, 1.560e-02, 2.137e-02, -2.697e-02));
	r += mul(s5_1, M4(-3.726e-02, 1.228e-01, -4.132e-02, 7.470e-02, 1.000e-02, 9.297e-02, 1.480e-01, -1.979e-04, 3.006e-02, -3.010e-02, -7.384e-02, 1.877e-01, 1.923e-02, 1.162e-01, -9.008e-02, 1.230e-01));
	r += mul(s5_2, M4(-2.161e-02, 1.287e-02, -6.257e-02, 1.472e-02, 5.709e-02, -4.043e-02, 1.515e-02, 1.690e-02, 2.494e-02, -1.136e-01, -9.791e-03, -5.449e-02, -3.664e-02, -4.210e-02, -1.805e-02, 4.579e-02));
	r += mul(s5_3, M4(-5.763e-02, 3.082e-02, -1.415e-02, -5.158e-04, -9.545e-02, 4.433e-02, 1.523e-03, -3.887e-03, 8.415e-02, -1.272e-01, 1.091e-01, 2.093e-02, -4.191e-02, -3.525e-02, -1.222e-01, -5.924e-03));
	r += mul(s5_4, M4(-3.762e-02, 1.749e-02, -2.028e-01, -4.117e-02, 1.383e-01, 9.640e-02, -3.933e-02, -1.313e-01, 2.118e-01, -9.010e-02, 2.746e-01, -3.513e-02, 2.214e-01, 5.478e-03, 9.056e-02, 2.379e-01));
	r += mul(s5_5, M4(-5.022e-02, 5.552e-02, -8.665e-03, -7.388e-02, 7.463e-03, -1.085e-01, 1.008e-01, 4.463e-02, 1.378e-01, 3.763e-02, 8.676e-02, 1.877e-01, -1.080e-02, 9.133e-02, 8.161e-02, -1.125e-01));
	r += mul(s5_6, M4(-3.119e-02, 1.863e-02, 1.994e-02, -3.058e-02, -5.287e-02, 1.133e-01, -2.383e-01, 8.179e-03, 6.562e-03, -7.171e-02, -7.081e-02, 3.105e-02, -6.101e-02, 9.925e-02, -8.762e-02, -8.117e-02));
	r += mul(s5_7, M4(3.108e-02, 5.279e-02, 2.264e-02, 6.178e-02, 1.021e-01, 5.481e-02, 1.419e-01, -7.912e-03, 1.020e-02, -2.002e-01, -1.818e-01, 9.782e-02, -5.765e-03, 9.245e-02, 8.338e-02, -8.818e-03));
	r += mul(s5_8, M4(-1.147e-02, -2.966e-02, 5.288e-03, -1.074e-03, 3.207e-02, -2.794e-02, -9.356e-03, -2.497e-02, 9.674e-02, 2.462e-02, -6.490e-02, 9.148e-02, -4.516e-02, -6.141e-02, -4.324e-02, 4.126e-02));
	r += mul(s6_0, M4(1.036e-01, -2.570e-01, -1.208e-01, 8.886e-02, 7.710e-02, -2.474e-02, 2.849e-03, 1.278e-02, -1.482e-02, -1.193e-01, -1.380e-02, 1.007e-02, 1.184e-01, -1.439e-01, -1.044e-01, 2.887e-02));
	r += mul(s6_1, M4(1.209e-01, -3.716e-01, 1.783e-01, 1.966e-01, 1.532e-02, -5.382e-02, -8.952e-02, 8.532e-02, -7.640e-02, 1.069e-01, 1.326e-01, -1.499e-01, -1.033e-02, -2.936e-02, 1.092e-01, 8.362e-02));
	r += mul(s6_2, M4(4.508e-02, -7.873e-02, -2.155e-02, 7.153e-02, 1.640e-02, 6.660e-02, 2.897e-03, 4.817e-03, -3.263e-02, 1.306e-02, -6.786e-02, 8.870e-02, 9.132e-03, 5.999e-02, -1.520e-02, 1.487e-02));
	r += mul(s6_3, M4(1.034e-01, -3.553e-01, 1.452e-01, 8.826e-02, 2.262e-02, 3.700e-02, 8.773e-03, 2.630e-02, -2.575e-02, 8.167e-02, -2.821e-02, -3.883e-02, 1.549e-01, -1.925e-02, 2.490e-01, -6.542e-02));
	r += mul(s6_4, M4(1.189e-01, -2.134e-01, 1.221e-01, 1.048e-01, -1.359e-01, -2.367e-02, -2.231e-01, -2.973e-02, 9.188e-02, -8.502e-02, -1.946e-02, 8.132e-02, 3.279e-01, -1.637e-01, 9.216e-02, 3.257e-02));
	r += mul(s6_5, M4(1.481e-01, -2.576e-01, -3.705e-03, 1.502e-01, 1.999e-02, 1.120e-01, 6.229e-02, -4.918e-02, -4.610e-03, 1.663e-01, -6.736e-02, -2.088e-01, 4.956e-02, 4.480e-02, 2.288e-01, 1.646e-01));
	r += mul(s6_6, M4(4.327e-02, -2.827e-01, 1.014e-01, 1.575e-01, -2.061e-03, 1.758e-01, -1.348e-01, -5.756e-02, -4.617e-02, -7.614e-02, 3.244e-02, -5.471e-02, 6.149e-02, 1.591e-01, -1.890e-01, 3.956e-03));
	r += mul(s6_7, M4(-1.924e-03, -2.328e-01, 7.678e-02, 1.188e-01, -3.185e-02, -1.139e-01, 1.001e-01, 3.155e-02, 1.520e-01, 1.029e-01, -4.720e-02, 8.691e-02, 5.704e-02, -7.140e-02, -2.305e-02, 9.784e-02));
	r += mul(s6_8, M4(3.108e-02, -1.951e-01, 8.722e-02, 7.469e-02, -2.963e-02, 2.462e-02, 3.282e-02, -9.392e-02, 8.362e-02, -9.343e-02, -8.554e-02, -7.126e-03, -6.007e-02, 7.685e-02, 3.067e-02, 7.109e-02));
	r += mul(s7_0, M4(-1.303e-02, 3.881e-02, -5.598e-02, 3.519e-02, -2.655e-02, 1.206e-01, 8.129e-02, -1.125e-01, 3.243e-02, 7.205e-02, 1.959e-02, 4.478e-02, -1.042e-02, -4.264e-03, 3.322e-02, 7.399e-03));
	r += mul(s7_1, M4(2.747e-02, 1.158e-02, -6.186e-02, -1.866e-02, -1.027e-02, -2.844e-02, -7.940e-02, -2.550e-04, 8.304e-03, -8.307e-02, 3.995e-04, 1.069e-02, -4.984e-03, 6.702e-02, 1.038e-01, 2.283e-02));
	r += mul(s7_2, M4(-2.234e-03, 1.159e-02, -5.776e-02, -2.739e-02, 6.097e-05, -3.496e-02, 4.195e-02, 2.330e-02, -2.140e-02, 1.020e-02, 3.447e-03, 1.336e-02, -5.864e-02, 2.480e-02, -2.061e-04, -5.578e-02));
	r += mul(s7_3, M4(-9.866e-03, -1.995e-02, -6.760e-02, 3.330e-02, -2.021e-01, 1.889e-01, -1.641e-01, 7.595e-02, -1.147e-02, -4.509e-02, 5.394e-02, -2.518e-02, 6.504e-02, -4.305e-02, 2.914e-02, -1.353e-01));
	r += mul(s7_4, M4(-2.741e-02, 1.660e-02, 3.526e-02, -1.491e-03, -2.191e-01, 2.912e-01, -4.188e-01, -1.748e-01, 1.531e-01, -1.138e-01, 2.163e-01, 2.023e-01, 8.601e-02, 1.334e-01, 5.447e-02, -6.781e-02));
	r += mul(s7_5, M4(-6.088e-03, -3.201e-02, 2.290e-02, -5.825e-02, -1.473e-02, -6.408e-02, -7.407e-02, 6.456e-02, 1.347e-02, 1.728e-02, 3.563e-02, -1.706e-01, 4.241e-02, 6.153e-02, 4.919e-02, -3.836e-03));
	r += mul(s7_6, M4(7.463e-03, 4.043e-02, 8.435e-02, 3.904e-02, -1.027e-01, -9.643e-02, -3.152e-02, 2.372e-02, 4.518e-02, 1.498e-02, 1.794e-02, -3.230e-02, -1.977e-02, 6.486e-02, -1.300e-01, 2.541e-03));
	r += mul(s7_7, M4(-2.846e-02, 3.876e-02, 6.175e-02, 4.732e-03, -3.786e-02, 9.850e-02, -7.317e-02, 1.009e-01, 8.281e-02, -3.916e-02, -1.173e-01, 4.051e-02, 2.851e-02, 7.268e-02, -1.119e-01, 5.271e-02));
	r += mul(s7_8, M4(-3.044e-02, 3.166e-02, 4.317e-03, -2.729e-03, -8.130e-02, -1.958e-01, 1.340e-01, -1.164e-02, -4.582e-03, -5.562e-02, 3.506e-02, -6.888e-02, -2.856e-02, -5.796e-02, -5.004e-02, -1.432e-02));
	r += V4(-3.570e-03, -4.291e-02, -1.299e-02, 1.425e-02);
	return r;
}

V4 f3(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) {
	V4 r = 0.0;
	r += mul(s0_0, M4(-4.462e-02, -1.310e-02, 6.609e-03, 5.740e-02, -8.642e-03, 4.899e-02, -1.583e-02, 2.844e-02, 6.648e-02, 3.871e-03, -1.768e-02, -4.883e-04, -9.543e-02, -1.685e-02, 1.791e-02, -3.466e-02));
	r += mul(s0_1, M4(-6.833e-02, 5.737e-02, 8.637e-02, -7.064e-02, -2.990e-02, 3.789e-03, -1.489e-02, 3.995e-02, -2.039e-02, -6.969e-02, 1.027e-01, 1.380e-01, 7.285e-03, 2.917e-02, 7.228e-02, -1.042e-01));
	r += mul(s0_2, M4(-5.274e-02, 2.310e-02, -6.004e-02, 1.292e-02, -3.580e-02, -8.655e-02, -3.162e-02, 2.008e-02, 6.223e-02, -2.044e-01, -1.197e-01, -4.355e-02, 1.107e-02, -7.214e-02, -3.807e-02, -5.094e-02));
	r += mul(s0_3, M4(1.280e-02, -1.115e-02, 2.182e-04, -2.115e-02, -3.455e-02, -1.242e-02, 2.680e-03, -6.225e-02, -7.121e-02, 1.230e-02, 2.253e-02, -6.083e-02, 4.416e-03, 6.251e-03, -1.459e-01, -1.779e-01));
	r += mul(s0_4, M4(1.331e-01, -1.431e-01, 1.044e-01, 3.952e-02, -1.209e-01, 1.043e-02, -9.951e-02, -8.294e-02, -1.616e-01, 1.760e-01, 7.440e-02, -6.678e-02, -5.565e-02, -9.580e-02, -7.826e-02, -1.852e-01));
	r += mul(s0_5, M4(2.952e-03, 1.791e-03, -1.559e-02, -7.742e-02, -6.616e-03, -7.258e-02, -5.087e-02, -7.048e-03, -7.029e-02, 4.702e-02, 1.028e-02, -8.031e-02, 1.639e-02, 1.458e-02, 3.045e-02, -5.971e-02));
	r += mul(s0_6, M4(-4.983e-02, -3.599e-03, -8.925e-04, -3.772e-02, 1.299e-03, -2.566e-02, -6.971e-03, 9.303e-02, -5.120e-02, -4.997e-02, 1.306e-02, 6.740e-02, 3.890e-02, -1.435e-01, -9.920e-02, 4.651e-02));
	r += mul(s0_7, M4(-4.194e-02, -2.549e-02, 8.896e-03, -5.754e-02, 8.325e-03, 7.072e-02, 5.385e-02, -1.802e-02, 1.931e-02, 2.261e-01, -9.477e-02, -1.303e-01, -1.919e-02, -2.643e-02, 1.063e-01, -2.408e-02));
	r += mul(s0_8, M4(-7.756e-03, 7.950e-03, 1.119e-03, -5.365e-03, -6.542e-03, -2.240e-02, 4.740e-03, -5.067e-02, -8.639e-03, -5.217e-02, -1.758e-02, -5.323e-02, 3.224e-02, -1.480e-02, -4.384e-02, -3.711e-02));
	r += mul(s1_0, M4(-7.298e-02, 1.445e-02, -9.454e-03, -1.058e-02, -8.425e-02, 5.788e-02, 4.315e-02, -4.853e-02, 3.247e-02, 1.929e-02, 1.093e-02, 5.032e-02, -3.654e-02, -9.867e-02, 6.489e-02, -3.972e-03));
	r += mul(s1_1, M4(-1.560e-02, 9.919e-03, 1.003e-02, 7.083e-03, -2.534e-01, -2.473e-01, -4.555e-02, -4.074e-02, 7.089e-02, 1.078e-02, -9.212e-02, -1.034e-01, -6.360e-02, 3.824e-02, -1.433e-02, -7.397e-02));
	r += mul(s1_2, M4(6.288e-02, -1.428e-03, 2.118e-02, 2.259e-04, -2.058e-01, 6.832e-02, -6.487e-02, -8.280e-02, -2.492e-02, -3.735e-02, -7.691e-02, 4.195e-02, -5.090e-02, -2.049e-02, -2.962e-02, 2.124e-02));
	r += mul(s1_3, M4(4.037e-02, -1.183e-01, -3.306e-02, -2.481e-02, -8.280e-02, -1.402e-01, -1.123e-01, -1.309e-01, 1.840e-02, 5.922e-02, 6.474e-02, 9.333e-02, 1.322e-02, 1.328e-01, -7.711e-04, 1.101e-01));
	r += mul(s1_4, M4(2.431e-01, -1.526e-02, 3.973e-01, 6.055e-02, -2.158e-01, 1.573e-01, 1.153e-01, -2.713e-01, 1.282e-01, 1.626e-01, 2.550e-01, 9.282e-02, 1.051e-01, 1.085e-01, 7.336e-02, 2.281e-02));
	r += mul(s1_5, M4(-5.789e-02, -1.408e-01, 2.681e-01, 8.174e-02, -1.873e-01, -1.429e-01, 1.980e-02, -2.227e-02, 1.378e-01, -7.831e-02, -1.198e-01, -5.265e-02, 6.618e-02, 1.066e-01, 2.457e-02, 1.241e-02));
	r += mul(s1_6, M4(-1.809e-02, 3.453e-02, -2.993e-02, -4.992e-02, 3.207e-02, -4.045e-02, -1.958e-03, -3.240e-02, -3.410e-02, 3.105e-02, -1.371e-01, 1.361e-01, 3.359e-02, -2.561e-02, -5.956e-03, -1.423e-02));
	r += mul(s1_7, M4(-1.847e-02, 1.268e-01, 2.167e-01, -4.562e-04, 8.563e-02, -1.685e-01, -6.241e-02, -2.425e-01, 1.773e-01, 1.288e-02, 6.724e-02, 1.295e-01, -3.540e-02, -1.026e-01, 7.806e-02, 1.190e-01));
	r += mul(s1_8, M4(5.638e-02, 9.204e-02, 4.834e-02, -3.144e-02, 1.102e-01, -1.814e-02, 1.203e-01, 1.756e-02, 2.416e-03, -2.827e-02, 1.459e-01, 1.547e-01, 3.526e-02, 1.291e-02, 8.043e-02, 3.777e-02));
	r += mul(s2_0, M4(1.361e-01, 3.691e-02, 4.395e-02, 1.606e-01, 7.802e-03, -1.292e-01, -1.353e-01, 3.487e-02, 1.666e-02, 6.090e-03, 1.957e-02, 1.009e-02, -2.801e-02, 7.073e-03, -6.974e-03, -1.360e-01));
	r += mul(s2_1, M4(4.295e-02, 1.758e-02, -1.065e-01, 1.572e-01, 3.012e-02, 6.403e-02, -6.217e-02, -2.477e-02, 5.268e-02, -4.278e-02, -3.036e-02, 3.319e-02, -3.211e-02, 6.374e-02, -8.323e-02, -5.754e-02));
	r += mul(s2_2, M4(-3.358e-03, -2.677e-02, -5.062e-03, -8.726e-02, -1.376e-01, -3.560e-02, -6.401e-02, 6.773e-02, 2.704e-02, 3.601e-02, 1.257e-02, 3.423e-02, 1.469e-01, -1.546e-01, -1.254e-01, -1.696e-02));
	r += mul(s2_3, M4(5.545e-02, 4.398e-02, -2.672e-02, 1.246e-01, 3.295e-02, 8.615e-02, -4.438e-03, 1.501e-02, -4.680e-02, 2.229e-02, 1.763e-02, 5.402e-02, 5.715e-02, -5.796e-03, -3.928e-02, 7.037e-02));
	r += mul(s2_4, M4(2.797e-02, -1.276e-01, 1.149e-01, 4.661e-02, 1.573e-01, 1.898e-01, 2.875e-01, 2.349e-01, 1.526e-01, -1.870e-01, 4.934e-02, 2.432e-01, 6.115e-02, -2.159e-03, 5.827e-02, -1.524e-01));
	r += mul(s2_5, M4(6.111e-02, -2.927e-02, -1.449e-01, 7.417e-02, 4.468e-02, 2.148e-02, 4.834e-02, -7.180e-02, 6.146e-02, 5.945e-02, 2.432e-02, 2.849e-02, 1.800e-01, 8.602e-02, 1.058e-01, -1.170e-01));
	r += mul(s2_6, M4(5.160e-02, 7.842e-03, -3.424e-02, 8.373e-02, -4.979e-02, -1.220e-01, -1.123e-01, 4.353e-03, 4.925e-02, 6.592e-02, -6.178e-04, -1.969e-02, -1.097e-03, 3.600e-03, 3.540e-02, 4.032e-03));
	r += mul(s2_7, M4(4.263e-02, 5.162e-02, -4.264e-02, 1.454e-02, -4.603e-02, -2.586e-02, -4.238e-02, 7.478e-02, -3.296e-02, -2.916e-02, -3.310e-02, 8.334e-02, -1.462e-02, 2.845e-02, -1.949e-02, -8.869e-02));
	r += mul(s2_8, M4(7.671e-03, -1.960e-02, 8.305e-03, 3.879e-03, 1.967e-02, 2.672e-02, -1.089e-02, 1.420e-02, 7.171e-03, -2.250e-02, 5.168e-02, 1.100e-01, -6.511e-02, -1.224e-02, -1.919e-01, -5.954e-02));
	r += mul(s3_0, M4(-5.489e-02, -1.202e-01, -6.728e-02, -8.068e-02, 7.251e-02, 5.867e-02, -6.234e-02, 1.155e-01, 5.125e-02, -5.765e-02, -5.751e-02, -1.635e-02, 1.864e-02, 6.609e-02, -1.105e-03, -4.950e-02));
	r += mul(s3_1, M4(-1.404e-01, -4.070e-02, -2.387e-01, 6.615e-03, -4.551e-02, 1.071e-01, 5.698e-02, 9.160e-02, 8.717e-02, -7.866e-02, 3.544e-02, 1.517e-01, -2.900e-02, -1.239e-02, -5.162e-02, 1.900e-02));
	r += mul(s3_2, M4(-6.847e-02, -1.020e-01, -7.745e-02, 3.094e-02, -4.354e-02, -6.276e-02, -3.621e-02, -6.242e-02, 5.462e-02, -1.308e-01, -1.210e-01, -2.065e-02, -4.381e-02, -5.895e-02, 8.102e-02, -1.266e-01));
	r += mul(s3_3, M4(-1.059e-04, -1.497e-01, -1.039e-02, -4.629e-02, 2.626e-02, 1.801e-02, -9.512e-04, 6.084e-02, 4.065e-02, 9.308e-02, -2.085e-02, -2.430e-03, 1.454e-02, -5.680e-03, -1.196e-01, -5.078e-02));
	r += mul(s3_4, M4(-8.717e-02, -1.156e-01, -4.551e-02, -1.233e-01, -3.711e-02, -9.569e-02, 7.082e-02, -9.574e-02, 3.863e-01, 1.752e-02, 7.915e-02, 4.365e-01, -6.295e-02, -1.045e-02, 4.266e-02, -2.812e-01));
	r += mul(s3_5, M4(-1.277e-03, -8.856e-02, -1.335e-02, 8.308e-02, -1.264e-01, 8.471e-02, -9.759e-02, -7.361e-02, 1.505e-01, 2.235e-01, 1.617e-01, 1.516e-02, -2.184e-02, 3.152e-02, -2.481e-01, -1.529e-01));
	r += mul(s3_6, M4(-6.069e-02, 7.997e-02, -2.999e-02, 6.667e-02, -3.571e-02, -8.018e-02, -4.923e-02, -2.775e-02, 2.642e-02, -7.642e-02, -3.110e-02, -8.301e-02, 3.597e-02, -6.190e-02, 5.578e-02, 1.650e-02));
	r += mul(s3_7, M4(1.950e-03, -5.827e-02, -1.350e-01, -3.964e-02, 2.067e-02, 8.880e-02, -5.618e-02, -9.141e-02, 6.710e-02, 1.227e-02, -1.851e-01, -1.946e-01, 4.703e-02, -4.175e-03, 1.142e-01, -2.030e-01));
	r += mul(s3_8, M4(-4.808e-02, -8.059e-02, -1.219e-01, -1.497e-01, 5.362e-02, -6.077e-02, -7.402e-02, -9.153e-02, 1.003e-01, -4.803e-03, -7.885e-02, -2.247e-02, 1.434e-02, -4.348e-02, 6.374e-02, -1.095e-01));
	r += mul(s4_0, M4(8.929e-02, 1.477e-01, 1.770e-01, 1.408e-01, 6.920e-03, 5.607e-02, -1.516e-02, -5.035e-02, 2.399e-02, -4.023e-02, -4.029e-02, -6.642e-02, -2.567e-02, -4.240e-02, -4.215e-02, -3.904e-01));
	r += mul(s4_1, M4(2.250e-02, -3.173e-03, 1.788e-02, 1.658e-01, 5.531e-02, -4.699e-02, -2.140e-02, -1.022e-01, -1.031e-02, -2.320e-02, 6.078e-02, -1.768e-02, 4.480e-01, -3.266e-01, -6.451e-02, -1.013e-01));
	r += mul(s4_2, M4(5.465e-02, 8.356e-02, 2.870e-04, -4.076e-02, 6.278e-02, 7.096e-03, -2.026e-02, -2.613e-02, -4.095e-03, -8.923e-02, 1.429e-02, 1.257e-02, -2.308e-02, -4.313e-02, -8.777e-02, 2.147e-02));
	r += mul(s4_3, M4(1.128e-02, -8.824e-02, 3.743e-02, 1.121e-01, 3.255e-02, 6.373e-03, 3.494e-02, 5.817e-02, -5.255e-02, 6.612e-02, -2.657e-02, 9.935e-02, -5.839e-02, 1.502e-03, 1.674e-02, -2.483e-01));
	r += mul(s4_4, M4(-3.301e-03, 1.381e-03, 2.348e-01, -7.932e-02, 7.121e-02, 1.894e-02, 5.626e-02, 1.242e-01, 3.424e-02, 2.703e-01, 9.893e-02, 9.129e-02, 3.413e-02, 3.306e-01, -6.402e-02, 1.710e-01));
	r += mul(s4_5, M4(1.190e-03, -1.362e-01, 6.616e-02, 7.430e-02, -1.908e-02, -7.131e-02, -6.680e-02, -1.412e-02, -1.783e-03, 1.154e-01, -5.414e-02, -1.866e-01, -6.085e-02, -8.559e-02, 1.169e-01, -4.633e-02));
	r += mul(s4_6, M4(-4.513e-03, 3.699e-02, 9.614e-02, 6.584e-02, -4.287e-02, -9.917e-02, -2.837e-02, -1.147e-01, 5.015e-02, 8.579e-03, 4.115e-02, 9.666e-02, 4.970e-02, 3.432e-02, 1.172e-01, -1.503e-01));
	r += mul(s4_7, M4(4.660e-02, 5.717e-02, 1.118e-02, -1.337e-02, 1.256e-02, -4.309e-02, 8.529e-02, -4.747e-02, -5.440e-02, -1.774e-02, 1.103e-01, 4.132e-02, -3.581e-02, 9.867e-02, 2.425e-02, -1.120e-02));
	r += mul(s4_8, M4(5.126e-02, 3.103e-02, 1.489e-01, 8.006e-02, -3.423e-02, -6.998e-02, 6.039e-02, -2.939e-02, -1.505e-02, 7.190e-03, -4.863e-02, -2.403e-02, -6.097e-02, -8.975e-02, -3.179e-02, -8.617e-02));
	r += mul(s5_0, M4(-8.262e-02, 2.481e-02, 4.836e-03, -4.425e-02, -2.515e-02, 5.051e-02, -6.125e-03, -5.547e-02, 6.078e-02, 5.525e-02, 9.176e-02, 1.572e-02, -1.979e-02, 5.908e-02, 9.396e-02, 5.186e-02));
	r += mul(s5_1, M4(-4.726e-02, 3.977e-02, -1.014e-01, -1.503e-02, 3.785e-02, -1.439e-02, 1.512e-02, -3.868e-02, 5.295e-02, -8.620e-02, 8.191e-02, 6.278e-02, 1.169e-01, -1.905e-01, 7.085e-02, 1.050e-01));
	r += mul(s5_2, M4(-2.736e-02, 6.220e-02, -5.548e-02, 6.147e-02, 1.232e-01, 1.163e-02, 1.780e-02, -7.604e-02, 1.381e-01, 6.557e-03, 8.610e-03, 6.522e-03, 2.147e-02, 1.777e-02, 1.541e-02, 1.560e-02));
	r += mul(s5_3, M4(5.259e-02, -3.546e-02, 6.230e-02, 6.797e-02, 7.732e-02, 8.863e-03, -1.746e-02, -3.139e-02, -5.820e-02, 2.022e-02, -1.225e-01, -3.615e-02, -4.625e-02, -2.697e-02, -2.579e-02, 1.510e-01));
	r += mul(s5_4, M4(-2.816e-02, -9.628e-02, 7.526e-02, -5.355e-02, -1.576e-01, -2.085e-01, -3.468e-01, -2.840e-02, -1.601e-02, 1.563e-01, 4.990e-02, -7.243e-02, -1.961e-01, 1.467e-01, 7.937e-02, 4.030e-02));
	r += mul(s5_5, M4(-3.320e-03, -1.204e-01, -5.233e-02, -5.508e-02, 1.663e-01, 1.481e-01, 2.805e-02, -1.490e-01, 1.753e-01, 2.295e-01, 2.995e-01, -5.415e-02, -9.904e-02, -8.550e-02, -1.067e-01, 2.813e-02));
	r += mul(s5_6, M4(-1.444e-02, 1.265e-02, 3.572e-02, 9.024e-03, -5.852e-02, -7.202e-02, -1.086e-01, -1.721e-01, 3.204e-02, 8.423e-03, 4.847e-02, 3.032e-02, 7.523e-03, 2.191e-02, 8.512e-03, -5.825e-02));
	r += mul(s5_7, M4(4.656e-02, 4.840e-02, -3.721e-02, -5.674e-02, 3.699e-02, -7.553e-02, -2.514e-02, 3.656e-02, -3.952e-02, 4.498e-03, 1.271e-03, 2.396e-01, -2.891e-02, 3.648e-02, 3.780e-03, 8.656e-03));
	r += mul(s5_8, M4(5.193e-02, 5.043e-02, 3.167e-02, 1.211e-02, -4.968e-02, -7.751e-02, -5.472e-02, -1.230e-01, -8.041e-02, 7.367e-03, 3.397e-02, -6.051e-02, 1.718e-02, -1.775e-02, -3.377e-02, 2.839e-03));
	r += mul(s6_0, M4(2.356e-01, 2.417e-01, -7.312e-03, 2.296e-01, 1.123e-02, 2.061e-02, 1.249e-01, 1.465e-01, -8.119e-02, -8.273e-02, 2.396e-02, -5.723e-02, -1.368e-03, -8.216e-02, -7.126e-02, 1.767e-01));
	r += mul(s6_1, M4(3.064e-01, 1.425e-01, 1.339e-01, 1.613e-01, 5.711e-02, 1.105e-01, 3.318e-02, -1.383e-02, -1.848e-01, 7.377e-02, -1.934e-01, -1.845e-01, 2.550e-02, 9.450e-02, 4.248e-02, 1.391e-02));
	r += mul(s6_2, M4(2.085e-01, 1.108e-01, 1.701e-01, 2.713e-02, 4.024e-02, 1.521e-02, 7.399e-02, 4.808e-02, -6.747e-02, 5.166e-02, 5.017e-02, 6.361e-02, -2.552e-02, -6.003e-02, 4.130e-04, 5.567e-02));
	r += mul(s6_3, M4(2.763e-01, 2.114e-01, 4.282e-02, 1.008e-01, 2.514e-02, 6.967e-03, 7.642e-02, 7.630e-02, 2.342e-02, -5.306e-02, 1.487e-02, -1.732e-01, 3.900e-02, -1.407e-01, -1.259e-01, 1.265e-01));
	r += mul(s6_4, M4(1.623e-01, -1.235e-02, 2.044e-01, 5.010e-02, 2.684e-01, -5.874e-02, 4.729e-02, -2.150e-03, -8.674e-02, -2.515e-03, 1.375e-02, 1.261e-01, 1.919e-01, -1.287e-01, 2.525e-01, 4.040e-01));
	r += mul(s6_5, M4(1.842e-01, 1.762e-01, 1.343e-01, 1.453e-01, -4.476e-02, -1.114e-02, -2.291e-02, 2.730e-02, -1.311e-01, -1.116e-01, -1.276e-01, 1.305e-01, 6.493e-02, 8.502e-02, 9.360e-02, -1.174e-02));
	r += mul(s6_6, M4(1.881e-01, 9.549e-02, -2.598e-02, 1.182e-01, 2.119e-03, 4.513e-02, 3.322e-02, -5.448e-02, -6.380e-03, -1.790e-02, -1.076e-02, -7.405e-02, -4.231e-02, 1.375e-02, -2.372e-02, 9.198e-02));
	r += mul(s6_7, M4(8.190e-02, -4.095e-02, 3.436e-02, 2.693e-01, 9.724e-03, -8.337e-02, 4.002e-04, -1.853e-01, -4.698e-02, -4.704e-03, -1.762e-01, 1.410e-02, 5.535e-02, -4.814e-02, -2.937e-02, 1.699e-01));
	r += mul(s6_8, M4(7.594e-02, 2.500e-02, 2.054e-01, 1.359e-01, -6.950e-03, -3.872e-02, -8.553e-02, 3.117e-02, 8.458e-02, 1.037e-01, 9.023e-03, 6.317e-02, -5.330e-02, -1.181e-02, -1.154e-02, 1.801e-02));
	r += mul(s7_0, M4(3.500e-02, -1.354e-02, -6.275e-02, 2.530e-02, -1.126e-01, -2.442e-02, 6.522e-02, -4.946e-02, -3.849e-02, -1.052e-03, 7.946e-02, 8.609e-02, 8.536e-02, -8.888e-02, -1.648e-02, -9.296e-03));
	r += mul(s7_1, M4(1.536e-02, -1.095e-01, -9.734e-03, 1.057e-01, -3.581e-02, -1.352e-01, -6.353e-02, 6.288e-02, 7.990e-02, -1.541e-02, -2.616e-02, -2.092e-02, -1.386e-01, 3.706e-02, 9.455e-02, -8.811e-02));
	r += mul(s7_2, M4(-1.917e-02, 3.680e-03, -9.103e-02, 6.932e-03, 1.862e-02, 5.211e-02, 4.197e-02, -2.273e-02, -4.925e-03, 5.122e-02, 6.613e-02, 4.453e-02, -1.724e-02, -3.789e-02, -5.483e-03, -6.117e-02));
	r += mul(s7_3, M4(6.494e-02, 5.085e-02, -3.084e-02, -4.360e-02, -3.047e-04, 1.334e-01, 1.050e-01, -5.439e-02, 3.051e-02, 5.960e-02, 8.870e-02, -2.083e-02, 6.208e-02, -2.183e-02, -5.802e-02, 6.238e-02));
	r += mul(s7_4, M4(7.910e-02, 3.215e-02, -5.218e-02, -2.821e-02, 1.271e-01, -2.276e-01, -1.860e-01, -1.064e-01, 2.585e-02, 3.594e-02, 2.412e-01, 2.176e-01, 1.651e-01, -1.164e-01, 4.035e-01, -8.403e-02));
	r += mul(s7_5, M4(3.154e-02, -6.521e-02, -3.050e-02, 6.594e-02, 1.437e-01, 1.430e-02, -1.211e-01, -3.589e-02, 6.985e-02, 6.226e-02, 3.090e-02, 1.198e-01, -6.754e-03, -1.157e-02, -1.102e-01, -1.356e-01));
	r += mul(s7_6, M4(2.194e-02, -1.653e-02, -4.073e-02, -1.260e-02, 3.354e-02, 3.995e-02, -3.034e-02, -9.874e-02, -2.932e-02, 2.086e-02, -1.687e-02, 5.243e-02, 2.606e-02, 2.746e-02, -2.158e-02, -3.860e-02));
	r += mul(s7_7, M4(2.909e-02, -1.935e-02, -9.481e-02, -4.910e-02, -9.199e-02, 1.076e-02, 9.528e-03, 1.829e-01, 1.451e-03, 2.201e-03, -2.104e-02, 1.605e-01, -8.880e-02, -1.363e-01, -9.424e-02, -2.646e-02));
	r += mul(s7_8, M4(-1.473e-02, -5.392e-02, -7.442e-02, -2.511e-02, -1.129e-02, -6.392e-02, 1.128e-01, 6.659e-02, -1.281e-02, -1.358e-02, -2.367e-02, 7.697e-02, 2.667e-03, -4.043e-02, -8.640e-02, -6.715e-02));
	r += V4(-2.098e-02, 7.151e-03, -1.746e-02, -5.302e-02);
	return r;
}

void Pass7(uint2 blockStart, uint3 tid) {
	float2 pt = float2(GetInputPt());
	uint2 gxy = Rmp8x8(tid.x) + blockStart;
	uint2 size = GetInputSize();
	if (gxy.x >= size.x || gxy.y >= size.y) {
		return;
	}
	float2 pos = (gxy + 0.5) * pt;

	V4 s0_0 = l0(-1.0, -1.0);
	V4 s0_1 = l0(0.0, -1.0);
	V4 s0_2 = l0(1.0, -1.0);
	V4 s0_3 = l0(-1.0, 0.0);
	V4 s0_4 = l0(0.0, 0.0);
	V4 s0_5 = l0(1.0, 0.0);
	V4 s0_6 = l0(-1.0, 1.0);
	V4 s0_7 = l0(0.0, 1.0);
	V4 s0_8 = l0(1.0, 1.0);
	V4 s1_0 = -max(-s0_0, 0.0);
	V4 s1_1 = -max(-s0_1, 0.0);
	V4 s1_2 = -max(-s0_2, 0.0);
	V4 s1_3 = -max(-s0_3, 0.0);
	V4 s1_4 = -max(-s0_4, 0.0);
	V4 s1_5 = -max(-s0_5, 0.0);
	V4 s1_6 = -max(-s0_6, 0.0);
	V4 s1_7 = -max(-s0_7, 0.0);
	V4 s1_8 = -max(-s0_8, 0.0);
	s0_0 = max(s0_0, 0.0);
	s0_1 = max(s0_1, 0.0);
	s0_2 = max(s0_2, 0.0);
	s0_3 = max(s0_3, 0.0);
	s0_4 = max(s0_4, 0.0);
	s0_5 = max(s0_5, 0.0);
	s0_6 = max(s0_6, 0.0);
	s0_7 = max(s0_7, 0.0);
	s0_8 = max(s0_8, 0.0);

	V4 s2_0 = l1(-1.0, -1.0);
	V4 s2_1 = l1(0.0, -1.0);
	V4 s2_2 = l1(1.0, -1.0);
	V4 s2_3 = l1(-1.0, 0.0);
	V4 s2_4 = l1(0.0, 0.0);
	V4 s2_5 = l1(1.0, 0.0);
	V4 s2_6 = l1(-1.0, 1.0);
	V4 s2_7 = l1(0.0, 1.0);
	V4 s2_8 = l1(1.0, 1.0);
	V4 s3_0 = -max(-s2_0, 0.0);
	V4 s3_1 = -max(-s2_1, 0.0);
	V4 s3_2 = -max(-s2_2, 0.0);
	V4 s3_3 = -max(-s2_3, 0.0);
	V4 s3_4 = -max(-s2_4, 0.0);
	V4 s3_5 = -max(-s2_5, 0.0);
	V4 s3_6 = -max(-s2_6, 0.0);
	V4 s3_7 = -max(-s2_7, 0.0);
	V4 s3_8 = -max(-s2_8, 0.0);
	s2_0 = max(s2_0, 0.0);
	s2_1 = max(s2_1, 0.0);
	s2_2 = max(s2_2, 0.0);
	s2_3 = max(s2_3, 0.0);
	s2_4 = max(s2_4, 0.0);
	s2_5 = max(s2_5, 0.0);
	s2_6 = max(s2_6, 0.0);
	s2_7 = max(s2_7, 0.0);
	s2_8 = max(s2_8, 0.0);

	V4 s4_0 = l2(-1.0, -1.0);
	V4 s4_1 = l2(0.0, -1.0);
	V4 s4_2 = l2(1.0, -1.0);
	V4 s4_3 = l2(-1.0, 0.0);
	V4 s4_4 = l2(0.0, 0.0);
	V4 s4_5 = l2(1.0, 0.0);
	V4 s4_6 = l2(-1.0, 1.0);
	V4 s4_7 = l2(0.0, 1.0);
	V4 s4_8 = l2(1.0, 1.0);
	V4 s5_0 = -max(-s4_0, 0.0);
	V4 s5_1 = -max(-s4_1, 0.0);
	V4 s5_2 = -max(-s4_2, 0.0);
	V4 s5_3 = -max(-s4_3, 0.0);
	V4 s5_4 = -max(-s4_4, 0.0);
	V4 s5_5 = -max(-s4_5, 0.0);
	V4 s5_6 = -max(-s4_6, 0.0);
	V4 s5_7 = -max(-s4_7, 0.0);
	V4 s5_8 = -max(-s4_8, 0.0);
	s4_0 = max(s4_0, 0.0);
	s4_1 = max(s4_1, 0.0);
	s4_2 = max(s4_2, 0.0);
	s4_3 = max(s4_3, 0.0);
	s4_4 = max(s4_4, 0.0);
	s4_5 = max(s4_5, 0.0);
	s4_6 = max(s4_6, 0.0);
	s4_7 = max(s4_7, 0.0);
	s4_8 = max(s4_8, 0.0);

	V4 s6_0 = l3(-1.0, -1.0);
	V4 s6_1 = l3(0.0, -1.0);
	V4 s6_2 = l3(1.0, -1.0);
	V4 s6_3 = l3(-1.0, 0.0);
	V4 s6_4 = l3(0.0, 0.0);
	V4 s6_5 = l3(1.0, 0.0);
	V4 s6_6 = l3(-1.0, 1.0);
	V4 s6_7 = l3(0.0, 1.0);
	V4 s6_8 = l3(1.0, 1.0);
	V4 s7_0 = -max(-s6_0, 0.0);
	V4 s7_1 = -max(-s6_1, 0.0);
	V4 s7_2 = -max(-s6_2, 0.0);
	V4 s7_3 = -max(-s6_3, 0.0);
	V4 s7_4 = -max(-s6_4, 0.0);
	V4 s7_5 = -max(-s6_5, 0.0);
	V4 s7_6 = -max(-s6_6, 0.0);
	V4 s7_7 = -max(-s6_7, 0.0);
	V4 s7_8 = -max(-s6_8, 0.0);
	s6_0 = max(s6_0, 0.0);
	s6_1 = max(s6_1, 0.0);
	s6_2 = max(s6_2, 0.0);
	s6_3 = max(s6_3, 0.0);
	s6_4 = max(s6_4, 0.0);
	s6_5 = max(s6_5, 0.0);
	s6_6 = max(s6_6, 0.0);
	s6_7 = max(s6_7, 0.0);
	s6_8 = max(s6_8, 0.0);

	t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8);
	t1[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8);
	t2[gxy] = f2(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8);
	t3[gxy] = f3(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8);
}

//!PASS 8
//!DESC conv7
//!BLOCK_SIZE 8
//!NUM_THREADS 64
//!IN t0, t1, t2, t3
//!OUT t4, t5, t6, t7

#define l0(x, y) V4(O(t0, float2(x, y)))
#define l1(x, y) V4(O(t1, float2(x, y)))
#define l2(x, y) V4(O(t2, float2(x, y)))
#define l3(x, y) V4(O(t3, float2(x, y)))

V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) {
	V4 r = 0.0;
	r += mul(s0_0, M4(2.429e-02, -2.332e-02, 4.314e-02, 1.064e-02, 2.172e-03, -9.378e-03, -6.456e-02, 5.519e-02, -1.450e-01, 3.668e-02, 5.025e-02, 1.031e-01, 1.049e-02, 1.149e-02, -3.782e-02, -6.485e-02));
	r += mul(s0_1, M4(-1.216e-02, 1.289e-01, 4.855e-03, -5.725e-03, 5.489e-02, -1.701e-02, 9.362e-04, 2.121e-02, -4.344e-02, -1.216e-01, 7.830e-02, 1.137e-01, 2.323e-02, -2.009e-02, -4.112e-02, -6.864e-02));
	r += mul(s0_2, M4(-4.899e-02, -3.241e-02, -3.536e-03, 5.684e-02, -2.514e-02, -3.484e-02, -3.530e-02, 9.876e-03, 5.883e-02, 2.769e-02, 4.048e-02, -3.935e-02, -2.423e-02, -9.510e-03, 3.234e-02, 1.199e-02));
	r += mul(s0_3, M4(-1.349e-01, -1.353e-01, 6.827e-03, -5.336e-02, -6.016e-02, -1.109e-02, 9.085e-02, -5.200e-02, -1.802e-01, -2.713e-02, 4.700e-02, 4.469e-02, -7.109e-03, -5.905e-02, -7.286e-02, -1.176e-01));
	r += mul(s0_4, M4(-1.088e-01, 2.120e-01, -9.531e-02, -7.526e-02, -3.024e-02, -8.052e-02, 7.018e-02, -1.512e-01, -8.795e-02, -9.058e-02, 1.183e-02, 8.818e-02, -2.706e-01, 5.783e-02, -6.422e-02, -7.333e-02));
	r += mul(s0_5, M4(-1.192e-02, -8.800e-02, 2.284e-02, 4.938e-02, 2.447e-02, 1.127e-01, 2.742e-02, -1.041e-01, -6.344e-03, 5.594e-03, -1.393e-01, -1.855e-02, -1.230e-01, 2.002e-02, -9.026e-02, -7.525e-04));
	r += mul(s0_6, M4(-2.280e-02, -7.241e-02, 2.928e-02, 1.022e-02, -5.593e-02, 2.254e-02, 6.370e-02, 1.904e-02, -8.088e-02, -4.722e-02, 5.025e-02, 6.547e-02, 1.310e-01, -4.054e-02, 1.521e-02, 7.518e-04));
	r += mul(s0_7, M4(1.161e-01, -6.198e-03, 8.148e-02, 1.159e-02, 1.194e-01, -4.006e-04, 7.621e-02, 6.951e-03, 2.882e-03, -2.765e-02, 4.471e-02, 8.374e-02, -1.463e-01, 2.266e-01, -2.111e-01, 8.026e-03));
	r += mul(s0_8, M4(-5.146e-02, -5.043e-02, -4.174e-04, 8.790e-03, 7.446e-02, -3.545e-02, 1.583e-02, 9.433e-03, -5.614e-02, -2.652e-02, 1.538e-02, 6.623e-02, -2.130e-02, -1.283e-01, -1.310e-02, -3.202e-02));
	r += mul(s1_0, M4(-6.381e-02, 3.036e-02, 5.632e-02, 6.065e-03, -7.098e-02, -4.316e-02, 3.370e-02, -9.274e-02, 1.666e-03, -3.818e-03, 3.570e-02, 2.863e-02, -3.719e-03, 1.548e-02, 8.114e-04, -2.087e-02));
	r += mul(s1_1, M4(-1.125e-01, 2.095e-01, -5.373e-03, 5.728e-02, -1.684e-02, 5.683e-02, 6.455e-02, 8.118e-02, -2.147e-02, -6.171e-02, 5.331e-02, -2.312e-02, 5.723e-02, -7.003e-02, -5.061e-02, -6.937e-02));
	r += mul(s1_2, M4(-3.802e-02, -1.043e-02, 4.402e-02, 5.884e-03, -6.816e-02, 2.554e-02, -6.462e-02, 3.692e-02, 2.855e-02, -7.795e-02, -1.158e-01, 9.489e-03, -3.566e-02, -3.169e-02, 1.226e-02, -1.260e-02));
	r += mul(s1_3, M4(-7.747e-02, -1.154e-03, -1.686e-01, -6.604e-02, -6.979e-02, 1.844e-02, -1.876e-01, 1.099e-01, -1.142e-01, -2.556e-02, 4.666e-02, 5.026e-02, -1.906e-02, -4.265e-02, 4.301e-02, -8.881e-02));
	r += mul(s1_4, M4(1.986e-03, -3.458e-01, 3.094e-01, -1.274e-01, 2.375e-01, -7.486e-02, -1.912e-01, 4.443e-02, 6.306e-02, 8.115e-02, -5.890e-02, 2.617e-03, -3.320e-02, -8.362e-02, -1.985e-03, -1.438e-01));
	r += mul(s1_5, M4(2.581e-02, 1.221e-02, 2.112e-02, 5.072e-02, -6.900e-02, 4.629e-02, -5.685e-02, 7.048e-02, -4.532e-02, -3.919e-02, 9.927e-02, -3.342e-02, -3.581e-02, 5.119e-02, -1.537e-01, 1.891e-02));
	r += mul(s1_6, M4(5.745e-03, 4.427e-02, 5.853e-02, 3.445e-02, 1.085e-01, -1.109e-01, 5.323e-02, 1.128e-01, -5.432e-02, 7.260e-02, -1.084e-01, 8.378e-02, 8.080e-02, -6.499e-02, 1.355e-01, 4.337e-02));
	r += mul(s1_7, M4(8.341e-02, 3.424e-02, 3.290e-02, -1.363e-02, 1.580e-01, 3.006e-01, 2.149e-03, 2.191e-01, 8.049e-02, 1.118e-02, 8.779e-03, 6.471e-02, -1.069e-01, -2.980e-02, -4.201e-02, 1.076e-01));
	r += mul(s1_8, M4(-6.640e-02, -2.167e-03, -7.802e-02, -6.291e-04, -5.879e-02, -1.432e-01, -1.221e-02, 4.027e-02, 1.564e-02, 1.884e-02, -3.517e-02, 2.630e-02, 7.528e-03, -1.925e-02, 4.156e-02, -1.512e-02));
	r += mul(s2_0, M4(-5.786e-02, -3.811e-02, 7.465e-02, -1.153e-01, 9.855e-03, 1.133e-03, -9.657e-02, -5.820e-02, -4.431e-02, -3.342e-02, 3.839e-03, 1.346e-02, 3.735e-03, -6.809e-03, -1.971e-02, -7.439e-02));
	r += mul(s2_1, M4(-2.957e-02, 2.258e-02, 9.420e-02, -4.162e-02, -2.241e-02, 3.396e-02, -9.463e-03, 6.093e-02, 2.241e-02, 5.466e-03, -1.272e-02, 9.590e-02, -2.979e-02, 1.120e-01, 2.448e-02, -3.680e-02));
	r += mul(s2_2, M4(6.455e-04, 3.673e-02, 7.003e-02, -6.360e-03, 3.664e-02, -6.810e-03, 2.913e-02, 8.731e-03, 9.219e-03, 7.756e-02, -1.472e-01, 1.450e-02, -3.781e-03, 3.535e-03, -6.173e-02, 3.913e-02));
	r += mul(s2_3, M4(4.824e-03, 3.512e-02, -1.684e-01, -4.602e-02, 4.232e-02, -2.799e-02, 9.374e-02, 2.816e-02, 7.564e-02, 1.042e-01, -8.223e-02, 2.283e-02, -7.351e-02, 3.973e-02, -1.391e-01, 2.565e-03));
	r += mul(s2_4, M4(-2.335e-01, 2.684e-02, -3.716e-01, -1.481e-02, 5.490e-02, 1.224e-01, -2.276e-02, 6.176e-02, 3.403e-02, 1.614e-01, 1.994e-02, 2.240e-01, -4.081e-02, 1.772e-02, 5.466e-02, 9.059e-02));
	r += mul(s2_5, M4(5.616e-02, -2.055e-02, -1.607e-01, 2.489e-02, -7.941e-02, 1.590e-01, -2.552e-02, 2.637e-02, 3.266e-03, -1.990e-02, 1.649e-01, 1.237e-01, 1.191e-01, -6.178e-02, 2.103e-01, -2.459e-02));
	r += mul(s2_6, M4(-4.669e-02, -3.467e-02, 2.145e-01, 6.600e-03, 9.879e-03, 2.595e-02, 1.483e-02, -3.995e-02, 1.454e-03, -4.725e-03, 4.796e-02, 6.020e-02, 5.114e-02, 7.967e-03, -8.423e-02, -1.184e-02));
	r += mul(s2_7, M4(8.979e-02, 2.301e-01, 2.273e-01, -2.155e-02, 2.495e-03, -3.534e-02, 5.817e-02, -4.511e-03, -1.832e-02, 2.070e-01, 7.383e-02, 6.175e-02, 4.585e-02, -6.402e-03, 1.827e-02, 1.981e-02));
	r += mul(s2_8, M4(1.809e-02, -7.041e-02, 3.777e-03, -9.913e-04, -8.198e-04, 6.589e-02, -9.252e-02, 4.381e-02, -2.878e-02, -3.744e-02, 1.090e-02, -6.804e-02, 1.619e-02, 1.085e-01, -1.391e-02, -4.922e-02));
	r += mul(s3_0, M4(-3.984e-02, 5.420e-03, -4.550e-02, -1.233e-01, 4.322e-02, -5.213e-02, 1.335e-02, -5.276e-03, -1.693e-02, -7.303e-03, 6.252e-03, -4.690e-02, 3.173e-02, 3.850e-02, -8.569e-02, 5.811e-02));
	r += mul(s3_1, M4(2.930e-02, 2.401e-02, -8.850e-02, -1.446e-01, -5.032e-02, 1.690e-01, 7.153e-02, 6.679e-02, -9.158e-02, 7.794e-02, -3.869e-02, 3.164e-02, -7.438e-02, 2.832e-02, 9.705e-02, -6.236e-02));
	r += mul(s3_2, M4(3.107e-02, 4.798e-02, 2.440e-02, -5.775e-02, 1.245e-02, -1.367e-01, 1.987e-01, 5.487e-02, -7.530e-02, -2.864e-02, -6.313e-02, 2.412e-02, -4.335e-02, -1.480e-02, 3.736e-02, 7.179e-02));
	r += mul(s3_3, M4(1.861e-02, 3.280e-02, -1.080e-01, -1.617e-02, 6.373e-02, -7.300e-02, 1.517e-03, -1.489e-02, 2.597e-02, -7.612e-02, 5.984e-02, -4.236e-02, -6.426e-03, -7.361e-02, 5.293e-02, -2.767e-02));
	r += mul(s3_4, M4(-1.651e-01, -9.867e-02, -5.652e-02, -5.221e-02, 8.410e-02, 1.038e-01, -7.153e-02, -2.243e-02, -2.833e-02, -4.937e-02, 2.125e-01, -5.315e-02, 5.610e-02, -1.936e-01, 4.265e-01, 2.646e-01));
	r += mul(s3_5, M4(-5.804e-02, 2.708e-02, 3.219e-02, 6.301e-02, -9.947e-02, 6.365e-02, -1.993e-01, -2.109e-02, 7.368e-03, 8.566e-02, -1.474e-02, 4.023e-02, 6.955e-02, -3.642e-02, 7.850e-02, 1.153e-02));
	r += mul(s3_6, M4(2.621e-02, 1.533e-02, 8.712e-02, -6.340e-02, 1.455e-03, -5.999e-03, 2.176e-01, 4.310e-02, -2.057e-02, -5.172e-02, 7.316e-02, -7.311e-02, -3.093e-02, 6.763e-02, -1.499e-01, 1.656e-02));
	r += mul(s3_7, M4(6.913e-02, 4.051e-02, 6.953e-02, -6.580e-02, 7.922e-03, 4.726e-02, 1.577e-01, 2.761e-02, 4.748e-02, -6.473e-02, 7.108e-02, -1.199e-02, -1.576e-02, 4.525e-02, 9.625e-03, 6.197e-02));
	r += mul(s3_8, M4(2.181e-02, 5.856e-02, -2.680e-02, -4.358e-03, 2.145e-02, -5.667e-02, 1.037e-01, 7.757e-03, 2.428e-02, -3.369e-02, 2.964e-02, -4.941e-02, 3.692e-02, 7.243e-02, 3.112e-02, -2.451e-02));
	r += mul(s4_0, M4(5.122e-03, -1.978e-03, 7.659e-02, 8.961e-02, 3.828e-02, -4.181e-02, -7.118e-02, 1.168e-02, 7.047e-02, -3.212e-03, 1.733e-03, 5.195e-02, 4.263e-02, 3.222e-02, -7.399e-02, 1.099e-02));
	r += mul(s4_1, M4(-1.358e-03, -7.553e-02, 1.239e-01, 6.855e-02, -8.082e-02, 2.287e-02, 1.084e-01, -1.193e-01, -7.322e-02, 1.251e-01, 1.231e-02, -1.121e-02, -9.672e-02, 2.728e-02, -5.787e-02, 3.771e-02));
	r += mul(s4_2, M4(1.228e-01, -2.115e-02, -5.478e-02, -9.391e-02, -7.383e-02, -1.139e-01, -2.159e-02, 1.082e-01, -5.576e-02, -8.820e-02, 8.714e-02, -7.996e-03, -1.793e-02, 4.947e-03, -4.892e-03, 9.143e-02));
	r += mul(s4_3, M4(4.650e-02, -1.056e-01, -1.316e-01, -9.701e-03, 8.937e-02, -7.974e-02, -1.177e-02, 7.090e-02, -2.849e-02, -2.004e-02, 4.543e-02, -4.452e-02, 7.714e-02, -7.697e-02, 1.959e-02, 7.751e-02));
	r += mul(s4_4, M4(-1.665e-01, 1.863e-01, -8.215e-02, -5.029e-02, -1.530e-02, 2.796e-01, 1.253e-01, -1.444e-01, -1.062e-01, 2.850e-02, -6.918e-03, -7.752e-02, -8.895e-02, -4.825e-03, 7.103e-02, 7.231e-02));
	r += mul(s4_5, M4(1.011e-01, 8.921e-02, -8.754e-02, -8.981e-02, -3.880e-02, -2.067e-01, 8.463e-02, -8.275e-02, 7.571e-02, -1.195e-02, 1.401e-02, -8.276e-02, 2.977e-02, 7.257e-02, 1.761e-01, 9.600e-02));
	r += mul(s4_6, M4(3.094e-02, -6.186e-03, 7.496e-02, 2.487e-02, -3.221e-02, -6.985e-03, 1.212e-01, -3.637e-02, -1.775e-02, 1.375e-02, -3.809e-02, -2.806e-02, -1.726e-03, 4.752e-02, -9.841e-03, -2.093e-02));
	r += mul(s4_7, M4(-6.665e-02, 8.968e-02, -1.947e-03, 7.916e-03, 1.457e-01, 9.335e-02, -1.535e-01, 5.746e-02, -3.287e-02, -1.573e-02, 4.369e-02, -4.585e-03, 1.116e-01, 8.898e-03, -6.204e-02, 6.182e-02));
	r += mul(s4_8, M4(5.083e-02, -3.205e-02, 4.880e-02, -4.987e-04, -2.765e-02, -7.642e-02, 7.199e-02, -1.001e-02, -4.057e-02, 2.807e-02, -1.059e-02, 3.827e-03, -6.015e-02, -4.428e-02, -1.895e-02, -2.997e-02));
	r += mul(s5_0, M4(1.595e-02, -4.492e-02, -6.387e-02, -7.062e-02, 8.614e-03, -1.412e-02, 5.415e-02, -3.994e-02, 1.212e-02, 2.353e-02, -5.643e-02, -9.880e-02, -9.901e-02, 3.936e-02, 3.183e-02, 1.666e-02));
	r += mul(s5_1, M4(2.284e-02, 9.285e-02, -7.087e-02, 1.962e-02, 1.836e-02, -9.249e-02, 7.274e-02, -3.925e-02, -1.148e-01, -3.381e-02, -1.147e-01, 1.551e-02, -1.443e-01, -1.351e-02, 2.367e-02, 7.472e-02));
	r += mul(s5_2, M4(-6.567e-02, -6.534e-04, -2.999e-02, 3.788e-02, 5.928e-02, 2.940e-02, -3.614e-02, -8.431e-02, 6.493e-03, 9.792e-02, 8.166e-02, 8.121e-03, 3.733e-03, -6.000e-02, 1.023e-02, -2.708e-02));
	r += mul(s5_3, M4(4.225e-02, -3.035e-02, -3.136e-02, -7.348e-02, -1.554e-02, 3.912e-02, -2.285e-01, -4.692e-02, 1.047e-01, -8.994e-02, 1.769e-01, 1.248e-01, -6.176e-02, 1.573e-02, -8.333e-02, 3.275e-02));
	r += mul(s5_4, M4(7.160e-02, 2.975e-01, 1.284e-01, -4.519e-02, 1.370e-01, -7.781e-02, -8.876e-02, 1.955e-02, -1.218e-01, 3.048e-01, 1.675e-01, 1.339e-01, -1.556e-01, 5.754e-02, 3.948e-02, 8.600e-02));
	r += mul(s5_5, M4(-2.356e-01, 7.179e-02, -8.412e-02, 1.549e-02, 8.241e-02, -2.647e-02, 7.431e-02, -4.565e-02, 1.053e-01, -1.426e-01, 1.563e-01, 8.870e-02, 1.115e-01, -2.496e-02, 3.625e-02, -9.573e-03));
	r += mul(s5_6, M4(2.063e-02, -1.548e-02, 9.447e-02, -9.163e-03, 4.279e-03, 7.593e-03, 4.727e-02, -1.522e-02, -3.840e-02, 8.830e-02, -7.936e-02, 3.788e-03, -9.664e-02, 6.229e-02, -1.123e-02, -2.040e-02));
	r += mul(s5_7, M4(4.598e-02, 1.724e-01, -9.667e-02, -3.743e-02, 7.593e-02, 8.019e-03, 7.071e-02, 2.795e-02, -1.156e-01, -3.966e-02, -1.742e-01, -7.907e-02, -2.258e-03, -8.263e-02, 8.969e-02, 9.902e-02));
	r += mul(s5_8, M4(6.340e-02, 5.018e-02, -9.251e-02, 1.755e-02, 2.021e-02, -7.445e-02, -7.248e-03, 3.071e-03, -4.418e-02, 1.510e-01, -5.434e-02, -4.775e-02, 8.281e-03, -7.794e-02, 1.005e-01, 2.240e-02));
	r += mul(s6_0, M4(-8.541e-02, -7.668e-03, 8.428e-02, -5.870e-02, -2.830e-03, 2.378e-02, 3.209e-02, 4.221e-02, -1.416e-02, -7.413e-02, -2.234e-03, 9.183e-03, 4.123e-02, 3.226e-03, 5.348e-02, 7.047e-03));
	r += mul(s6_1, M4(4.549e-02, -2.740e-02, 4.882e-02, 1.613e-02, -2.580e-02, 1.114e-02, 6.591e-02, -1.712e-02, 4.918e-02, 1.411e-01, -6.373e-03, 4.880e-02, -1.052e-01, 7.112e-02, 1.284e-01, -2.216e-02));
	r += mul(s6_2, M4(-5.023e-02, 6.376e-03, 1.275e-02, 7.544e-02, -2.007e-02, -2.550e-02, 7.617e-03, 1.943e-03, -3.676e-02, -2.099e-02, -3.072e-02, 6.026e-02, -3.776e-02, -3.612e-02, 1.224e-02, -3.338e-02));
	r += mul(s6_3, M4(-6.905e-02, -9.304e-02, -1.556e-01, -4.700e-02, 7.271e-03, 3.053e-03, -1.253e-01, 3.238e-02, 7.153e-02, 1.051e-02, -3.062e-02, 2.274e-02, -8.820e-02, 7.336e-03, -1.870e-01, -3.185e-02));
	r += mul(s6_4, M4(2.769e-01, -5.243e-02, -1.432e-01, 8.889e-02, -2.395e-03, -1.144e-01, 1.223e-01, -4.498e-02, -1.315e-01, 1.572e-01, -1.973e-01, 1.578e-01, 3.022e-01, 5.166e-02, -1.834e-01, 1.442e-03));
	r += mul(s6_5, M4(-4.584e-02, -1.975e-03, -1.124e-02, 1.870e-02, -3.355e-02, -6.602e-03, 4.581e-02, -2.536e-02, -1.210e-01, 1.289e-02, -1.664e-01, 4.967e-02, 1.213e-01, 6.296e-02, -1.414e-01, -5.444e-04));
	r += mul(s6_6, M4(-1.273e-02, -5.103e-02, 7.597e-02, -5.106e-03, 1.498e-03, -6.909e-02, -3.966e-02, -4.536e-03, -1.409e-02, 3.820e-02, 4.236e-03, 2.091e-02, 8.105e-03, -8.909e-03, -4.405e-02, 5.760e-02));
	r += mul(s6_7, M4(-8.759e-02, -8.420e-02, -1.538e-01, 1.251e-01, 8.604e-02, 8.704e-02, 2.454e-02, -5.437e-02, 1.268e-01, 1.532e-02, 4.255e-02, 4.514e-02, -7.425e-02, 5.124e-02, -1.197e-02, 1.105e-02));
	r += mul(s6_8, M4(-9.679e-02, -3.635e-02, -1.599e-01, 2.217e-02, 1.157e-02, 7.903e-02, -1.282e-02, 2.517e-02, 3.072e-03, 1.212e-02, -5.068e-02, 1.219e-02, -5.048e-02, -4.789e-02, 6.014e-02, -1.834e-02));
	r += mul(s7_0, M4(-7.395e-02, 4.417e-02, -7.326e-02, 1.657e-04, -8.550e-02, 5.602e-03, -2.309e-02, 1.407e-02, 2.018e-02, -4.896e-02, 5.191e-02, -1.649e-02, 1.707e-02, -9.629e-03, 1.198e-01, 5.186e-02));
	r += mul(s7_1, M4(9.423e-02, -3.370e-02, -5.891e-02, -2.028e-03, -7.932e-02, -3.480e-02, 4.558e-02, 8.408e-02, 7.433e-02, 2.920e-02, -1.659e-02, -4.810e-03, -1.731e-01, 6.637e-03, 1.876e-01, -3.583e-02));
	r += mul(s7_2, M4(-3.342e-02, 5.117e-02, -6.354e-02, -2.572e-02, -8.278e-02, 4.129e-02, 3.478e-02, 5.152e-02, 1.427e-02, -4.283e-02, 3.041e-02, -3.038e-02, -5.652e-02, -1.183e-01, 3.366e-02, 4.271e-02));
	r += mul(s7_3, M4(-1.862e-01, 6.683e-02, -8.013e-02, -1.807e-01, -6.053e-02, -1.974e-02, -1.750e-01, -2.887e-02, -3.272e-03, -6.210e-02, 2.414e-02, -1.185e-02, -6.046e-02, -6.312e-02, -5.798e-02, -7.354e-03));
	r += mul(s7_4, M4(-1.143e-01, -9.726e-03, -8.392e-02, 2.117e-01, -5.687e-02, -1.317e-01, 8.600e-02, -1.780e-03, -2.099e-02, 8.315e-02, -2.715e-02, 5.463e-02, 1.539e-01, 1.487e-01, -2.683e-01, 5.775e-02));
	r += mul(s7_5, M4(-1.194e-01, -3.820e-02, 4.998e-02, -4.698e-02, -9.662e-02, 5.332e-02, 6.678e-03, 1.984e-02, -5.187e-02, 3.933e-02, -2.790e-02, 2.727e-02, -1.750e-02, -5.128e-02, 4.539e-02, 6.119e-02));
	r += mul(s7_6, M4(8.464e-02, -5.824e-02, 6.962e-02, -2.679e-02, 3.461e-03, -1.479e-02, -4.726e-02, 7.719e-02, 3.804e-02, -1.091e-02, 1.558e-01, 8.211e-03, -3.342e-02, -5.296e-02, -5.517e-02, 3.337e-02));
	r += mul(s7_7, M4(3.936e-02, -9.005e-02, -2.283e-01, 4.386e-02, 2.988e-02, -1.306e-01, 3.540e-02, -1.752e-02, 6.241e-02, -8.067e-03, 5.707e-02, 2.101e-02, -2.648e-02, -6.828e-02, 6.354e-02, 4.999e-02));
	r += mul(s7_8, M4(1.137e-01, 1.752e-02, -1.071e-01, 1.907e-02, -6.962e-02, 2.038e-02, -8.363e-02, 5.959e-02, 5.774e-02, 3.732e-03, 1.098e-01, -3.748e-03, -1.016e-01, -4.212e-02, 8.611e-02, -2.438e-03));
	r += V4(3.150e-02, 5.513e-03, -4.366e-02, 3.548e-03);
	return r;
}

V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) {
	V4 r = 0.0;
	r += mul(s0_0, M4(7.776e-02, -8.490e-02, -2.067e-02, 5.440e-02, 1.813e-02, 1.135e-01, 2.268e-02, 3.435e-02, -4.555e-02, -1.398e-01, -8.772e-02, -3.666e-02, -7.805e-02, 5.617e-02, -8.735e-04, -8.572e-04));
	r += mul(s0_1, M4(-3.737e-02, -1.494e-01, -3.927e-04, 3.512e-02, -1.903e-02, 3.908e-02, 2.908e-02, 9.508e-02, -1.152e-02, -1.174e-01, -1.210e-02, -8.811e-02, 1.220e-02, 1.813e-01, 5.844e-03, -1.913e-03));
	r += mul(s0_2, M4(3.612e-02, -7.414e-02, -2.258e-02, 4.371e-02, 1.355e-02, -1.887e-02, -3.716e-03, 2.872e-02, -5.748e-02, -3.006e-02, 2.186e-02, 9.390e-02, 5.408e-04, 7.620e-02, 2.978e-02, -4.244e-02));
	r += mul(s0_3, M4(4.609e-02, 4.968e-02, -8.155e-02, -8.411e-02, 6.703e-02, -1.381e-01, -6.787e-02, 1.911e-02, -4.370e-02, 5.156e-03, -2.329e-02, -1.051e-01, 3.994e-02, -2.949e-02, -3.222e-02, -7.972e-02));
	r += mul(s0_4, M4(1.459e-01, 1.109e-01, -5.734e-02, 1.614e-01, -1.681e-01, 3.539e-03, -9.288e-02, -1.257e-01, -1.014e-01, 5.845e-02, -2.211e-01, -1.245e-02, 1.704e-01, -1.163e-01, -1.202e-01, -1.136e-01));
	r += mul(s0_5, M4(-5.593e-02, 8.014e-02, 4.557e-02, 1.334e-02, 5.387e-02, -2.346e-02, 4.065e-02, -3.965e-02, -3.307e-02, 1.917e-02, 4.113e-02, -3.794e-02, -4.636e-02, -6.311e-02, -6.138e-02, 1.189e-01));
	r += mul(s0_6, M4(-2.809e-02, 2.056e-02, 3.268e-02, -1.146e-01, 1.179e-01, 9.341e-02, -6.772e-02, -1.389e-01, -1.195e-01, 8.743e-02, 1.178e-01, -9.008e-02, -7.854e-02, 5.421e-02, -3.389e-02, 3.070e-02));
	r += mul(s0_7, M4(-6.888e-02, -3.600e-02, 9.016e-02, -3.882e-02, -1.024e-01, -4.963e-02, 6.431e-02, -2.895e-02, 6.086e-02, -1.285e-01, 1.493e-01, 9.648e-02, 2.547e-01, 1.457e-01, -1.138e-01, 5.216e-02));
	r += mul(s0_8, M4(1.205e-02, -1.702e-02, 5.149e-02, 7.429e-03, 8.371e-02, 4.814e-03, 2.502e-02, -9.301e-02, 3.953e-02, 2.052e-02, -1.253e-03, -1.018e-03, -1.161e-01, 1.265e-02, 4.431e-02, -8.109e-02));
	r += mul(s1_0, M4(1.480e-01, -1.260e-01, 4.616e-02, 4.163e-02, 4.978e-03, 1.068e-02, -1.285e-02, 2.300e-02, -1.355e-03, -2.255e-02, -6.099e-03, 5.602e-02, 1.958e-02, 4.921e-03, -2.415e-02, 5.545e-03));
	r += mul(s1_1, M4(-1.063e-01, 4.527e-02, -4.872e-02, 7.474e-02, 5.204e-02, 2.735e-02, -1.533e-02, -1.948e-02, -1.332e-01, -1.691e-02, -1.712e-03, -6.606e-02, -2.148e-02, -6.268e-02, -4.682e-02, -1.285e-02));
	r += mul(s1_2, M4(-4.936e-02, 8.319e-03, 2.232e-02, -4.762e-02, -5.726e-03, -1.410e-02, 3.006e-02, -5.014e-03, 2.384e-02, -1.393e-02, 1.334e-02, 1.541e-02, 2.470e-02, -1.395e-02, 1.095e-02, -3.093e-02));
	r += mul(s1_3, M4(9.590e-02, 7.083e-02, -1.208e-01, -4.706e-02, -1.534e-01, -5.825e-03, 2.298e-02, -1.212e-01, 6.187e-03, -4.588e-03, -8.587e-02, -3.169e-03, -1.830e-02, 1.651e-02, -1.949e-02, -1.584e-02));
	r += mul(s1_4, M4(-2.469e-01, 4.538e-02, -1.953e-01, -1.554e-01, -7.490e-02, -3.285e-01, 1.109e-01, -7.588e-03, 1.271e-01, 2.494e-03, 6.129e-02, 5.532e-02, -4.167e-02, 2.438e-01, -2.073e-01, -2.021e-01));
	r += mul(s1_5, M4(1.644e-02, 1.013e-02, 4.682e-02, 4.437e-02, 7.156e-02, -3.308e-02, -3.563e-02, 9.885e-02, -4.479e-02, 1.694e-02, -1.865e-02, -8.136e-03, 2.330e-02, 1.699e-02, -1.056e-01, 1.087e-01));
	r += mul(s1_6, M4(-6.609e-02, 2.807e-02, 3.110e-02, -8.848e-02, 2.765e-02, 8.620e-02, 3.319e-02, -5.906e-02, 8.444e-02, 2.822e-02, 1.107e-02, -3.552e-02, -9.031e-02, 1.956e-02, -5.127e-02, -5.593e-03));
	r += mul(s1_7, M4(1.399e-01, -4.111e-02, 2.611e-02, 4.028e-02, -1.003e-02, 1.235e-01, -1.821e-02, -1.066e-02, 3.003e-02, -9.736e-03, 1.179e-01, -3.768e-02, 4.234e-02, 5.035e-02, -2.162e-01, -4.588e-02));
	r += mul(s1_8, M4(1.795e-02, -3.291e-02, 6.307e-03, 5.110e-02, -1.168e-01, 7.285e-03, -6.830e-02, 3.189e-02, 6.169e-02, 7.242e-03, 2.605e-02, -2.164e-02, -1.964e-02, -3.577e-02, -4.418e-02, -3.246e-02));
	r += mul(s2_0, M4(-3.049e-02, 3.876e-02, 1.798e-02, 1.660e-02, 5.262e-03, -7.100e-03, -2.275e-04, -2.742e-02, -5.409e-02, 1.216e-01, 1.681e-02, -8.374e-02, 1.909e-01, -1.592e-02, -2.350e-02, 4.850e-02));
	r += mul(s2_1, M4(-3.809e-02, 7.727e-02, -1.538e-02, -7.747e-02, -1.638e-02, 3.303e-03, -4.032e-02, -1.914e-02, 6.667e-02, -1.473e-01, 9.159e-02, 2.177e-02, 7.800e-02, -2.749e-02, 2.020e-02, -7.160e-02));
	r += mul(s2_2, M4(3.598e-02, -6.197e-02, 1.842e-02, -8.793e-03, -1.018e-02, 5.654e-02, 5.311e-02, 2.156e-02, 6.909e-02, -4.899e-02, -7.643e-04, 5.588e-02, -2.846e-02, 1.155e-02, -6.372e-02, -4.291e-02));
	r += mul(s2_3, M4(3.729e-02, 1.007e-01, 2.525e-02, -1.302e-01, -1.402e-02, -3.402e-02, -2.153e-02, 9.281e-02, 1.373e-01, -5.732e-02, -3.573e-02, -3.186e-02, -4.562e-02, -1.542e-01, -2.256e-02, 2.462e-02));
	r += mul(s2_4, M4(1.297e-01, -8.224e-02, -2.092e-01, 2.684e-01, -2.339e-03, 2.797e-02, 3.087e-02, 1.398e-01, 9.239e-02, 7.381e-02, -8.860e-02, -5.791e-02, 6.191e-02, 5.237e-02, 7.781e-02, 2.119e-01));
	r += mul(s2_5, M4(-4.760e-02, 1.027e-02, 3.280e-02, -1.385e-02, 5.485e-02, 3.425e-02, -8.161e-02, 1.306e-01, -7.043e-04, -1.177e-02, 5.946e-02, -7.133e-02, -3.342e-02, -6.026e-02, 2.064e-02, -1.492e-02));
	r += mul(s2_6, M4(-9.405e-02, -4.250e-02, 1.683e-02, 7.080e-03, 2.519e-02, 1.549e-02, 3.318e-02, -1.970e-02, 8.231e-02, -8.037e-02, -7.925e-02, -6.067e-03, 3.509e-03, -4.632e-02, 5.849e-02, 7.958e-02));
	r += mul(s2_7, M4(1.424e-01, 5.223e-02, 3.317e-02, -2.217e-01, 5.145e-02, 1.991e-02, 3.337e-03, -8.977e-02, 6.504e-02, -5.637e-02, 1.894e-01, 6.373e-02, -8.009e-02, -2.898e-02, -2.254e-02, 5.607e-02));
	r += mul(s2_8, M4(4.897e-02, -2.811e-02, 4.906e-02, 4.026e-02, -1.230e-01, -5.932e-02, 4.591e-03, 4.566e-02, -9.466e-02, -2.565e-02, 3.836e-02, 5.090e-02, 2.224e-02, 4.254e-02, -7.822e-02, -1.107e-02));
	r += mul(s3_0, M4(-1.730e-02, -3.316e-02, 3.565e-02, -2.075e-02, -1.353e-02, 7.812e-03, 9.693e-03, -2.420e-02, -1.212e-02, -9.251e-03, -1.824e-02, -5.754e-02, 4.554e-02, 6.043e-02, 2.870e-02, 3.500e-02));
	r += mul(s3_1, M4(-7.212e-02, 2.119e-02, 3.419e-02, 8.373e-02, -3.011e-02, 1.856e-02, -1.155e-01, -2.185e-03, 7.813e-02, -2.907e-02, -3.720e-02, 3.578e-02, -1.057e-01, -8.175e-03, -5.053e-02, 2.420e-02));
	r += mul(s3_2, M4(9.529e-02, 1.016e-01, 3.172e-02, -2.386e-02, -8.391e-02, 6.843e-02, -9.736e-03, -9.194e-02, 1.399e-02, -8.978e-03, 2.900e-02, -6.146e-02, -6.319e-02, -1.560e-03, -9.177e-02, -6.033e-02));
	r += mul(s3_3, M4(2.028e-02, 1.315e-02, 5.543e-02, 7.761e-02, -8.352e-02, 6.295e-03, -6.043e-03, 9.804e-02, -3.626e-02, -5.410e-02, -2.883e-02, -1.200e-02, -2.720e-03, -3.627e-02, 6.209e-02, -1.272e-02));
	r += mul(s3_4, M4(1.698e-02, -1.959e-03, -1.426e-01, -7.658e-02, -5.019e-03, 7.411e-02, 1.118e-01, 1.282e-01, 8.223e-02, 2.384e-01, -8.747e-02, -1.974e-01, 2.444e-02, -6.515e-02, 5.985e-02, -2.092e-01));
	r += mul(s3_5, M4(-5.460e-03, -2.205e-02, 8.765e-02, -6.076e-02, -5.173e-02, -5.875e-02, -8.130e-02, 2.187e-01, 1.908e-02, -2.804e-02, -4.912e-03, -1.549e-02, 3.233e-03, -2.002e-02, 4.922e-02, 1.501e-01));
	r += mul(s3_6, M4(4.957e-02, -3.018e-02, 1.195e-02, -1.627e-02, -2.495e-02, 2.673e-02, 3.557e-02, -1.834e-02, -3.139e-02, 1.945e-01, -4.287e-02, -2.590e-01, 6.648e-02, -1.035e-01, 1.978e-02, 2.497e-01));
	r += mul(s3_7, M4(4.261e-02, -4.467e-03, 9.605e-02, -1.084e-01, -4.150e-02, 8.793e-03, -4.267e-02, -6.823e-02, -4.787e-02, -1.536e-01, 1.780e-01, 1.504e-02, -8.017e-02, 6.095e-02, -1.297e-01, 1.903e-01));
	r += mul(s3_8, M4(3.615e-02, -5.108e-02, 3.453e-02, 1.011e-01, -7.597e-02, -6.015e-03, 1.119e-02, -3.885e-02, 2.361e-02, 3.135e-02, -5.040e-02, -3.085e-02, 3.696e-02, 7.501e-02, -2.058e-02, -1.242e-01));
	r += mul(s4_0, M4(4.920e-02, -7.762e-02, -1.594e-02, 6.495e-02, -6.660e-03, 6.573e-02, 5.427e-02, -1.234e-01, 4.898e-02, 6.072e-03, 4.608e-02, 5.062e-02, 9.321e-02, -5.075e-03, -1.583e-02, 6.021e-03));
	r += mul(s4_1, M4(5.049e-02, 7.086e-02, -7.803e-02, -2.777e-03, 1.827e-01, -1.059e-01, -8.425e-02, 1.674e-01, 9.989e-03, -5.947e-02, 7.359e-03, 1.393e-01, -1.248e-01, 6.799e-02, -1.108e-01, -3.081e-02));
	r += mul(s4_2, M4(-9.983e-02, -3.100e-03, 5.232e-02, 3.045e-02, 7.090e-02, -1.493e-02, 3.596e-02, -6.723e-02, -3.310e-02, 3.425e-02, -6.720e-02, -7.788e-02, -2.674e-02, -1.370e-02, 6.114e-02, -7.593e-02));
	r += mul(s4_3, M4(-1.041e-01, 7.343e-02, 5.043e-02, -8.283e-03, -3.446e-02, -7.448e-02, 1.155e-01, 9.511e-02, -4.565e-03, -7.229e-03, -4.273e-02, 5.366e-02, -1.361e-01, -4.208e-02, 1.054e-01, 1.825e-01));
	r += mul(s4_4, M4(1.120e-01, 1.808e-02, -1.422e-01, 4.162e-02, 3.161e-01, 8.619e-02, 1.298e-01, -2.176e-01, 7.566e-02, -1.184e-01, -4.221e-02, -6.631e-03, -1.206e-02, 2.938e-02, -1.332e-01, 4.335e-02));
	r += mul(s4_5, M4(2.097e-01, 3.137e-02, 1.642e-01, -1.393e-01, -2.272e-01, -7.897e-02, -9.002e-02, 5.244e-02, -2.101e-02, -5.433e-02, 1.045e-01, -1.250e-02, 4.406e-02, 1.994e-02, -2.391e-02, 6.093e-02));
	r += mul(s4_6, M4(-6.634e-02, -6.862e-03, 8.092e-03, 3.152e-02, 9.126e-02, 3.181e-02, -2.699e-02, -2.576e-02, 2.686e-02, -5.285e-02, 1.551e-02, 1.751e-02, 1.191e-01, 8.773e-02, 1.456e-02, -1.665e-01));
	r += mul(s4_7, M4(-2.891e-02, -3.612e-02, -5.749e-02, 2.471e-02, 2.772e-01, -1.306e-01, 8.702e-02, 1.480e-01, -4.810e-02, 4.675e-02, -4.128e-03, -1.958e-02, 5.197e-02, -6.999e-02, 9.079e-02, 7.055e-02));
	r += mul(s4_8, M4(9.988e-02, 1.870e-02, 8.131e-02, -2.260e-02, -8.316e-02, 8.149e-03, 2.334e-02, -5.434e-03, 5.043e-02, 2.868e-02, -2.653e-02, 6.189e-02, -3.698e-02, 3.083e-02, -3.091e-02, 2.374e-03));
	r += mul(s5_0, M4(5.254e-02, -7.315e-02, -2.225e-02, 2.153e-02, 5.725e-02, -4.636e-02, 1.601e-03, 4.516e-02, 8.068e-02, 2.729e-02, -3.870e-02, -7.201e-02, 6.760e-02, 4.552e-03, 4.438e-02, 4.500e-02));
	r += mul(s5_1, M4(1.685e-01, 1.509e-01, 2.948e-03, 1.776e-02, 1.675e-02, 6.696e-02, -5.363e-02, -6.286e-03, 6.024e-02, 3.023e-03, -5.779e-02, 5.292e-03, -1.165e-01, 7.991e-02, -1.851e-02, 4.630e-04));
	r += mul(s5_2, M4(-7.170e-02, 1.589e-02, -1.884e-02, -3.883e-02, 7.200e-02, -1.746e-02, -2.195e-02, 9.030e-02, 1.058e-01, -8.948e-02, -1.143e-01, 6.897e-02, -6.149e-02, 1.574e-03, 4.231e-02, 1.363e-02));
	r += mul(s5_3, M4(3.745e-02, 1.247e-01, 2.019e-02, -3.741e-02, -3.449e-03, -4.847e-03, 1.767e-03, 5.781e-03, -7.935e-02, -2.522e-01, 3.269e-02, 1.529e-01, 3.102e-01, -1.893e-01, -6.841e-02, 3.808e-02));
	r += mul(s5_4, M4(1.921e-01, -7.191e-02, 1.048e-01, 6.779e-03, 6.828e-02, 2.094e-02, 4.389e-02, 5.065e-02, -1.595e-02, -2.677e-01, -7.935e-02, -1.236e-01, 1.635e-01, -4.481e-02, 4.704e-02, 1.869e-01));
	r += mul(s5_5, M4(6.662e-03, 6.683e-02, -9.824e-02, 1.703e-01, -2.904e-02, 5.726e-02, -3.509e-02, -1.256e-01, -1.423e-01, 9.473e-02, -1.299e-01, 1.175e-01, -3.071e-02, -2.751e-02, 4.384e-02, -8.192e-02));
	r += mul(s5_6, M4(-1.580e-02, -3.752e-02, 4.568e-02, 7.775e-03, 2.727e-02, 7.825e-03, -1.738e-02, -9.195e-03, -3.736e-02, -6.091e-02, 3.713e-02, 6.820e-02, 1.742e-02, 1.099e-02, -2.703e-02, -4.711e-02));
	r += mul(s5_7, M4(-2.568e-02, -3.585e-02, -8.110e-05, -1.047e-02, 1.640e-02, -2.770e-02, 2.889e-02, 3.152e-02, 8.110e-02, -1.274e-01, 6.282e-02, 1.401e-01, 1.375e-02, -5.635e-02, 4.940e-02, 5.975e-02));
	r += mul(s5_8, M4(4.971e-02, -3.187e-02, 5.470e-02, -5.790e-02, 2.007e-02, 1.442e-02, -5.515e-02, 8.206e-02, 9.185e-03, -2.850e-02, -4.617e-02, -2.761e-02, 7.174e-02, -2.299e-03, -8.726e-03, -4.167e-02));
	r += mul(s6_0, M4(-2.564e-02, -1.334e-03, 1.618e-02, -3.692e-02, 1.668e-02, -1.014e-02, 3.184e-02, -8.247e-05, -5.784e-02, -6.551e-02, -1.474e-02, -1.508e-02, 1.280e-02, 1.104e-02, 1.940e-02, -2.254e-02));
	r += mul(s6_1, M4(1.281e-02, 2.432e-02, -3.916e-02, -8.733e-02, 2.112e-02, -1.650e-02, -5.389e-02, 2.289e-02, 6.510e-02, -1.273e-01, 8.516e-02, -4.619e-02, 7.458e-02, -4.286e-02, -1.097e-01, -4.328e-02));
	r += mul(s6_2, M4(2.650e-02, -1.040e-02, 7.558e-03, -1.189e-02, -6.083e-03, -3.193e-03, -9.984e-03, -3.848e-02, -2.653e-02, -7.239e-02, 2.185e-02, 1.225e-02, -3.582e-02, -2.979e-02, -6.728e-02, -6.881e-02));
	r += mul(s6_3, M4(-3.437e-02, 6.707e-02, 3.284e-02, -1.281e-01, 2.801e-02, 2.978e-03, 2.879e-02, -7.024e-03, -5.116e-02, 3.863e-02, -5.467e-02, 8.941e-02, 5.295e-02, -1.732e-01, -1.607e-01, -6.141e-03));
	r += mul(s6_4, M4(-5.183e-02, 9.150e-02, 2.158e-01, 1.855e-01, -1.774e-01, 4.494e-02, -3.685e-02, -2.582e-02, 1.537e-01, -1.493e-02, -8.099e-02, 1.548e-01, 8.523e-02, -1.556e-02, 1.480e-01, 1.893e-01));
	r += mul(s6_5, M4(1.187e-01, 5.356e-02, -6.313e-02, 9.672e-02, 2.376e-03, 1.626e-02, 6.959e-03, -9.679e-02, 5.677e-03, -3.208e-02, -9.106e-02, 1.793e-01, 1.366e-02, -1.316e-02, 6.110e-02, -5.431e-02));
	r += mul(s6_6, M4(-3.338e-03, 8.092e-02, 6.916e-02, -1.039e-01, -6.471e-02, 8.988e-03, 5.310e-02, 7.353e-02, 1.128e-02, 3.089e-02, -2.249e-02, 1.239e-02, -6.665e-02, -3.720e-02, -2.291e-02, 5.894e-02));
	r += mul(s6_7, M4(1.134e-01, 1.221e-02, -6.070e-02, 2.300e-01, -8.134e-02, 3.754e-02, 9.095e-02, -3.804e-02, 7.501e-02, -1.004e-01, 1.162e-01, 5.006e-03, 4.744e-02, 1.739e-02, -1.323e-01, 8.227e-02));
	r += mul(s6_8, M4(-1.192e-01, -2.425e-02, -9.741e-03, 1.033e-02, 1.348e-02, 4.238e-03, 5.134e-02, 2.432e-02, -3.472e-02, 2.930e-02, -1.485e-02, -6.624e-03, 2.464e-02, -6.743e-03, -5.677e-02, -1.125e-02));
	r += mul(s7_0, M4(2.961e-04, 5.695e-02, 1.692e-02, 1.641e-02, 1.896e-02, 3.423e-02, 3.964e-02, -4.835e-02, -7.153e-02, -1.021e-02, -1.489e-02, -4.491e-02, 6.248e-03, 6.234e-02, 3.034e-02, -4.721e-02));
	r += mul(s7_1, M4(4.580e-02, -4.302e-03, 5.372e-02, 1.092e-01, 9.311e-02, 7.429e-02, -7.219e-02, 1.315e-02, -4.621e-02, -3.435e-02, 8.618e-02, 6.339e-04, -5.809e-02, -5.998e-02, -8.710e-02, -5.751e-02));
	r += mul(s7_2, M4(4.485e-02, 2.007e-02, 3.034e-02, 5.546e-02, -1.602e-02, -3.560e-02, -2.043e-02, 4.087e-02, 4.749e-02, -8.724e-04, 4.623e-02, 1.840e-02, -3.697e-02, 8.748e-03, -3.764e-03, -1.345e-01));
	r += mul(s7_3, M4(1.538e-01, -1.023e-01, -1.091e-01, -1.067e-01, 1.039e-01, 2.164e-03, -3.211e-02, 3.243e-02, -7.896e-02, 8.814e-02, -3.925e-02, -7.875e-02, 5.796e-03, -5.906e-02, -7.718e-02, 3.024e-02));
	r += mul(s7_4, M4(-1.636e-01, -7.883e-02, -2.044e-01, 3.189e-02, -2.025e-01, -3.092e-02, -2.128e-01, 3.520e-02, -5.502e-03, 7.618e-02, -1.111e-01, 4.137e-02, 2.036e-01, -2.299e-02, 2.865e-02, 1.354e-01));
	r += mul(s7_5, M4(1.363e-01, -4.054e-02, -2.335e-02, 2.584e-02, 3.570e-02, 8.628e-02, 1.420e-02, -1.352e-02, -3.313e-03, -4.462e-03, -1.690e-03, 3.997e-02, -1.273e-01, -3.589e-03, -4.635e-02, -2.387e-02));
	r += mul(s7_6, M4(8.699e-02, 7.315e-03, 4.443e-02, 1.327e-02, -7.019e-02, -1.051e-02, 4.505e-02, 4.059e-02, -5.563e-02, 3.547e-02, 8.318e-03, -7.002e-02, -3.911e-02, 1.473e-02, -2.082e-02, -4.584e-03));
	r += mul(s7_7, M4(2.100e-03, -4.188e-02, 1.642e-01, 2.146e-01, 5.512e-02, 1.341e-01, -6.786e-02, -2.989e-02, -6.685e-02, -7.217e-02, 1.433e-01, -1.345e-02, -5.557e-02, 2.869e-02, -8.787e-02, 5.125e-02));
	r += mul(s7_8, M4(9.447e-02, -1.057e-02, 9.580e-02, 3.739e-03, -6.323e-02, -3.503e-03, 1.983e-02, 7.549e-03, -5.925e-02, -3.357e-02, 2.204e-03, 2.899e-02, -6.129e-02, 1.970e-02, -2.830e-02, -5.696e-02));
	r += V4(2.020e-02, -4.553e-03, -2.459e-02, 1.937e-02);
	return r;
}

V4 f2(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) {
	V4 r = 0.0;
	r += mul(s0_0, M4(1.077e-01, 9.312e-02, -5.188e-02, 7.384e-02, -5.968e-02, 2.927e-03, -3.236e-02, -5.140e-02, -3.015e-02, -3.844e-02, 8.994e-02, -1.180e-01, -9.557e-02, -4.035e-02, 4.019e-02, -4.095e-02));
	r += mul(s0_1, M4(-6.296e-02, -4.241e-02, -6.281e-03, -6.678e-02, 9.068e-03, 3.762e-02, 1.032e-01, -4.027e-02, 2.218e-02, 6.294e-02, -1.689e-02, 1.324e-01, -1.670e-02, -7.119e-02, -5.797e-02, -4.215e-03));
	r += mul(s0_2, M4(-6.613e-02, -1.290e-02, 1.807e-02, 4.846e-04, 4.511e-02, 4.319e-02, -1.287e-02, 1.502e-02, -3.730e-02, -1.057e-02, 2.362e-02, -5.695e-02, -5.287e-03, 1.501e-02, -1.542e-02, -1.826e-02));
	r += mul(s0_3, M4(-3.606e-02, 1.462e-01, 2.109e-01, -2.420e-02, 1.386e-03, -5.487e-02, 5.800e-03, 4.513e-02, -1.422e-01, -8.521e-02, 2.863e-01, -2.422e-01, 2.749e-02, -1.105e-01, 1.328e-03, -2.261e-01));
	r += mul(s0_4, M4(1.142e-01, 4.361e-02, 4.532e-02, 2.146e-03, -1.481e-01, -1.284e-01, -3.127e-02, -9.069e-02, -1.411e-01, -2.611e-02, -2.912e-01, 2.501e-01, 1.544e-01, -3.091e-02, -1.803e-01, -5.694e-02));
	r += mul(s0_5, M4(-5.748e-02, -8.738e-02, -1.973e-02, 4.722e-02, -3.113e-02, -7.124e-03, 8.854e-03, -1.669e-02, 6.880e-02, 2.122e-02, -5.463e-02, 1.973e-03, -6.377e-02, -5.127e-03, -3.509e-02, 5.819e-02));
	r += mul(s0_6, M4(5.161e-02, 6.178e-02, -6.580e-03, 7.210e-02, 1.012e-01, 1.839e-02, 5.339e-02, 2.248e-02, 6.152e-02, 3.338e-02, 4.869e-03, -1.636e-01, -5.381e-02, -7.649e-02, 3.155e-02, -9.452e-02));
	r += mul(s0_7, M4(3.837e-02, -2.835e-03, 9.076e-02, -8.489e-04, 6.508e-02, 2.648e-02, -6.215e-02, 8.233e-02, 2.223e-02, -6.067e-02, 5.139e-02, -4.934e-02, -9.968e-02, 6.218e-02, 2.873e-03, -6.527e-02));
	r += mul(s0_8, M4(5.733e-02, 1.893e-02, -6.683e-02, 4.339e-02, -4.547e-02, 2.725e-02, 1.997e-02, -1.831e-02, 3.253e-02, 5.693e-02, -3.447e-02, 3.470e-02, 6.988e-02, -1.481e-01, 4.093e-02, 4.865e-02));
	r += mul(s1_0, M4(1.218e-01, 1.391e-01, -9.917e-02, 2.681e-01, -6.814e-03, -4.386e-02, -1.731e-02, -4.896e-02, -4.263e-03, 7.133e-03, -2.855e-02, 1.162e-01, -6.505e-02, -3.310e-02, 4.968e-02, -7.657e-02));
	r += mul(s1_1, M4(-1.714e-01, -3.825e-02, 1.989e-03, -8.984e-02, 4.135e-02, 3.992e-02, -8.609e-03, -6.335e-02, -3.296e-02, -3.458e-02, 4.691e-03, 1.030e-01, 2.207e-02, -1.372e-02, -2.782e-02, 7.251e-02));
	r += mul(s1_2, M4(1.158e-02, 2.207e-02, 1.008e-02, -1.122e-02, 5.151e-02, 3.024e-02, -4.144e-02, 5.029e-02, 6.255e-02, 6.074e-03, 3.316e-02, -3.334e-02, 9.460e-04, 3.999e-02, 4.258e-02, -1.319e-02));
	r += mul(s1_3, M4(-6.833e-03, 2.981e-01, 1.190e-01, 4.172e-01, -2.082e-01, -1.010e-01, 1.663e-01, 2.756e-02, 1.126e-02, 6.412e-02, 5.292e-02, 3.232e-02, 5.397e-02, -6.604e-02, -8.024e-02, -3.327e-02));
	r += mul(s1_4, M4(1.082e-01, -7.797e-02, 3.743e-01, -2.278e-02, 1.222e-01, 3.582e-02, 3.057e-01, -1.525e-01, 3.195e-02, 3.684e-03, 1.318e-01, -1.431e-02, -1.279e-01, -1.193e-01, -9.534e-02, 1.111e-02));
	r += mul(s1_5, M4(-7.195e-02, -8.329e-03, 2.136e-02, 1.546e-02, -3.515e-02, -3.558e-02, 3.955e-02, 8.360e-02, -1.101e-01, -1.174e-01, -3.039e-02, -2.660e-02, -1.078e-02, -1.269e-02, -1.314e-01, 7.183e-02));
	r += mul(s1_6, M4(5.152e-02, 5.627e-02, -4.271e-02, 1.356e-01, 1.291e-01, -1.450e-01, -1.893e-02, 1.868e-01, 6.525e-02, 5.429e-02, 1.105e-01, -9.555e-02, -7.097e-02, 1.332e-02, 3.753e-02, -4.533e-03));
	r += mul(s1_7, M4(3.159e-02, 5.370e-02, 1.118e-01, -2.614e-02, 1.104e-01, 4.714e-03, 8.088e-02, -1.254e-01, 5.789e-02, 1.972e-02, -2.895e-02, 1.946e-02, 5.596e-02, 1.046e-01, 3.249e-02, -6.347e-02));
	r += mul(s1_8, M4(4.930e-02, 2.392e-02, -3.323e-02, 3.137e-02, -1.039e-01, -1.100e-01, 1.549e-02, 1.914e-02, 5.477e-02, 1.615e-03, 6.294e-02, -3.471e-02, 6.569e-02, -4.273e-02, 1.651e-02, 9.589e-03));
	r += mul(s2_0, M4(3.221e-02, 7.105e-02, 2.089e-02, 2.255e-02, -6.648e-03, -2.093e-02, 6.522e-03, -2.318e-02, -8.746e-03, 5.476e-02, -3.258e-02, -1.102e-01, 1.749e-01, 2.147e-02, -2.678e-02, -5.268e-02));
	r += mul(s2_1, M4(-1.864e-02, -4.252e-04, -8.805e-02, -4.201e-02, -8.959e-02, -2.189e-02, 2.673e-02, 4.537e-02, 2.190e-01, 1.311e-01, 5.191e-03, 4.206e-02, -2.308e-02, -9.327e-02, -3.661e-02, -1.148e-01));
	r += mul(s2_2, M4(-2.992e-02, 3.498e-02, 5.053e-02, 1.144e-02, 3.595e-02, 1.984e-02, 1.643e-02, -2.382e-02, 3.988e-02, 4.426e-02, 1.589e-02, 3.642e-02, 6.817e-03, 9.914e-03, -7.543e-02, 4.210e-02));
	r += mul(s2_3, M4(-1.351e-01, 1.527e-01, 2.414e-03, -2.222e-01, 9.301e-03, -4.186e-04, -7.728e-03, -1.186e-01, 7.279e-04, 4.599e-02, 1.162e-02, -1.156e-01, -9.149e-03, 2.266e-02, 3.615e-02, -1.548e-01));
	r += mul(s2_4, M4(1.148e-01, 1.567e-01, -2.782e-01, 3.847e-03, -2.695e-02, -1.642e-03, -8.825e-02, -1.578e-02, 1.793e-01, 2.009e-01, -2.151e-01, -1.173e-01, -1.997e-02, -3.643e-02, 1.601e-02, 3.910e-02));
	r += mul(s2_5, M4(4.336e-02, 8.253e-02, 1.158e-02, -1.455e-02, -5.475e-02, -1.940e-02, -6.271e-02, 1.003e-02, -1.544e-02, 4.810e-03, 6.860e-02, -2.462e-02, -6.332e-02, -6.534e-02, 8.899e-02, 2.976e-02));
	r += mul(s2_6, M4(-3.050e-02, -1.263e-02, -1.012e-01, -3.166e-02, -7.449e-02, -2.503e-02, -5.525e-02, 3.306e-02, -3.886e-02, 5.543e-02, 5.382e-02, -3.963e-02, 3.195e-02, 3.792e-03, -2.265e-02, -2.995e-02));
	r += mul(s2_7, M4(-9.550e-02, 8.093e-03, -6.212e-02, 1.538e-02, -4.168e-04, 4.310e-03, -1.145e-04, 1.177e-01, -2.583e-02, 1.046e-01, -5.829e-03, 1.304e-01, 3.170e-02, -6.325e-02, 3.423e-02, 6.609e-03));
	r += mul(s2_8, M4(4.738e-02, 2.854e-02, -7.887e-02, 4.673e-02, 8.907e-03, 1.670e-02, 2.104e-02, -5.520e-02, 3.884e-02, 1.973e-02, 1.202e-01, 7.833e-02, -7.837e-02, -4.010e-02, 8.031e-02, -9.000e-02));
	r += mul(s3_0, M4(2.679e-02, 2.173e-02, -1.323e-02, -1.968e-03, 1.052e-02, 2.645e-02, -4.180e-02, 3.171e-02, -2.261e-02, -2.448e-02, -5.866e-02, -7.578e-02, 7.423e-02, 5.530e-02, -1.174e-02, 1.807e-02));
	r += mul(s3_1, M4(-5.659e-02, -4.152e-02, -6.892e-02, -7.556e-02, -1.425e-01, -7.938e-02, 3.291e-02, -3.197e-02, 6.805e-02, 8.159e-02, -9.808e-02, -1.154e-01, -5.595e-02, -8.041e-02, -1.891e-02, -1.454e-01));
	r += mul(s3_2, M4(-1.225e-03, -7.471e-03, 4.640e-03, -2.132e-02, -1.187e-01, -3.307e-02, -3.342e-02, 5.745e-02, -4.317e-03, 6.259e-02, -9.434e-03, 8.730e-02, -2.601e-02, -3.185e-02, -5.734e-02, 6.062e-02));
	r += mul(s3_3, M4(8.051e-03, 5.024e-02, -7.025e-02, 8.818e-02, -8.869e-02, -1.515e-02, 5.252e-03, -2.860e-02, -6.840e-02, -2.742e-03, 8.916e-02, 2.543e-02, 1.025e-01, -2.034e-02, -5.761e-03, 3.742e-01));
	r += mul(s3_4, M4(-4.240e-02, 7.146e-02, -4.326e-02, 8.698e-02, -1.976e-02, -1.082e-01, -2.198e-02, -1.269e-01, 6.265e-03, 7.006e-02, -1.549e-02, -1.263e-01, -1.393e-01, -1.977e-01, 4.258e-01, 8.264e-02));
	r += mul(s3_5, M4(-4.435e-02, 2.545e-02, 1.863e-02, 2.360e-02, -3.587e-02, -8.683e-02, 8.091e-02, 1.216e-02, -1.017e-02, 2.783e-02, -2.242e-02, -4.877e-02, 3.016e-02, -4.084e-02, 2.683e-02, 3.136e-02));
	r += mul(s3_6, M4(4.558e-02, -3.461e-02, 1.245e-02, 6.048e-02, -3.341e-03, -4.436e-02, 1.241e-02, 5.533e-02, 8.177e-03, -4.103e-03, 3.365e-02, 7.956e-02, -7.129e-02, 5.349e-02, -4.337e-02, -1.018e-01));
	r += mul(s3_7, M4(-3.148e-03, -2.321e-02, 3.585e-02, -1.823e-02, -1.738e-02, 3.308e-02, 9.017e-02, 3.342e-04, -5.252e-03, -2.761e-02, 1.347e-01, 2.909e-02, -3.061e-03, 2.617e-02, -1.475e-02, -3.956e-02));
	r += mul(s3_8, M4(9.334e-03, -1.697e-02, -5.276e-02, 2.803e-02, 3.719e-03, -2.596e-03, -3.970e-02, 2.540e-02, -1.326e-01, -3.069e-02, 4.041e-02, -2.985e-02, -5.079e-02, 6.418e-02, -2.325e-02, -7.249e-02));
	r += mul(s4_0, M4(3.710e-02, 3.287e-02, 4.836e-02, -9.585e-02, 1.885e-03, 5.685e-03, -5.507e-03, 4.869e-02, 8.140e-02, 2.910e-02, -7.700e-02, 8.265e-02, -8.802e-03, 7.170e-02, -8.877e-04, -2.661e-02));
	r += mul(s4_1, M4(-1.652e-02, 7.513e-04, 3.076e-02, 5.516e-02, -2.761e-02, 6.551e-03, 1.027e-01, -4.674e-02, -5.690e-03, 5.166e-03, -3.712e-02, -1.844e-02, -3.894e-02, -3.135e-02, -6.120e-02, 1.714e-02));
	r += mul(s4_2, M4(6.301e-02, -5.956e-03, -3.980e-03, -2.500e-02, 4.266e-02, 1.491e-01, -6.553e-02, 1.103e-01, -3.315e-02, -1.231e-02, -3.530e-02, -6.407e-03, 3.088e-02, 4.581e-02, 6.168e-02, 5.876e-02));
	r += mul(s4_3, M4(-1.942e-02, -8.815e-03, -6.423e-02, 1.349e-02, -9.786e-02, -5.757e-02, -2.002e-02, 1.992e-01, 3.463e-02, 5.042e-02, 2.547e-03, -2.162e-02, -7.019e-02, -1.184e-01, 6.112e-02, -1.267e-01));
	r += mul(s4_4, M4(2.089e-03, 1.035e-02, -1.179e-01, 6.884e-02, 1.921e-01, -1.789e-02, 1.577e-01, -2.155e-01, 3.626e-03, 1.062e-02, 1.924e-02, 9.510e-02, -9.622e-02, -1.313e-01, -3.070e-02, 1.933e-01));
	r += mul(s4_5, M4(7.287e-02, 1.362e-01, 3.051e-02, -7.536e-02, -8.768e-02, 6.265e-02, -8.248e-02, -5.200e-02, 2.649e-02, -7.968e-02, 1.080e-02, -6.782e-03, -6.966e-02, -3.352e-02, 2.188e-02, -3.046e-03));
	r += mul(s4_6, M4(-4.444e-03, -1.998e-02, -3.328e-02, 4.993e-02, 1.821e-02, -1.305e-02, -1.285e-02, 1.196e-01, -1.910e-02, 1.556e-02, -1.920e-02, 4.315e-03, 5.933e-02, 1.134e-03, 3.927e-03, -1.362e-02));
	r += mul(s4_7, M4(-3.740e-02, -4.676e-02, 6.161e-02, 7.081e-03, -1.382e-01, 1.377e-01, -5.139e-02, -1.040e-01, -7.295e-04, -2.772e-02, 2.037e-02, 3.765e-02, 2.740e-02, -3.209e-02, -1.239e-02, -3.002e-03));
	r += mul(s4_8, M4(-8.065e-02, 8.158e-03, -6.351e-02, 3.085e-02, 8.517e-02, -4.434e-02, -9.817e-02, -1.627e-03, 1.648e-02, 8.428e-03, 1.192e-02, -3.798e-02, 5.931e-02, -2.690e-02, 5.350e-02, 2.766e-02));
	r += mul(s5_0, M4(-1.622e-02, -4.403e-02, 1.023e-01, -1.139e-01, 5.014e-02, 2.008e-02, 2.745e-02, 2.676e-02, 7.143e-02, 2.604e-03, -1.732e-01, 2.093e-02, 7.864e-02, 5.946e-02, -5.021e-02, -2.883e-02));
	r += mul(s5_1, M4(3.377e-02, 3.455e-02, 1.131e-01, -1.224e-01, -1.474e-02, -1.274e-02, -7.299e-02, 2.685e-02, -1.012e-02, 8.374e-02, 1.117e-01, 1.403e-01, -2.833e-02, -1.101e-02, -1.028e-01, 1.124e-01));
	r += mul(s5_2, M4(-1.305e-01, -9.027e-02, -1.216e-01, 2.585e-02, 5.500e-02, 4.169e-02, 3.386e-02, -4.850e-03, 1.853e-02, -9.479e-03, 1.559e-01, -5.464e-02, -4.298e-02, -1.684e-02, 4.651e-02, 2.252e-02));
	r += mul(s5_3, M4(-2.488e-02, 1.727e-02, 3.289e-02, -1.119e-01, -7.352e-02, 6.214e-03, 5.950e-02, -5.322e-02, -2.526e-03, -2.031e-02, -1.997e-01, 2.193e-01, 1.698e-01, 9.388e-02, -1.760e-02, -2.799e-01));
	r += mul(s5_4, M4(-1.644e-01, -6.465e-02, -1.964e-01, -2.100e-01, -8.515e-03, 8.435e-04, 2.077e-02, 6.934e-02, -1.016e-02, 1.320e-01, -4.857e-01, -1.243e-01, 2.594e-02, 5.574e-02, -2.013e-01, -6.372e-02));
	r += mul(s5_5, M4(6.039e-02, 1.160e-02, -8.932e-02, -9.111e-03, -3.383e-02, 1.090e-02, 7.543e-02, -7.096e-02, -1.671e-01, -2.288e-01, -1.212e-01, 4.297e-02, -7.059e-02, 4.491e-02, 3.937e-02, 5.454e-02));
	r += mul(s5_6, M4(2.136e-02, -8.988e-03, -3.129e-02, 1.085e-02, 4.960e-02, -2.167e-03, 2.436e-03, -5.589e-02, -3.687e-02, 2.597e-02, 5.500e-02, 7.337e-02, -2.889e-02, 1.762e-01, 9.740e-02, -2.120e-01));
	r += mul(s5_7, M4(-5.648e-02, -9.203e-02, 1.907e-02, -7.378e-02, -1.303e-02, 3.863e-03, -1.161e-01, 2.245e-02, 6.718e-02, 4.304e-02, 2.344e-01, 2.262e-02, 8.896e-02, 6.554e-02, -2.300e-01, 7.788e-02));
	r += mul(s5_8, M4(-3.322e-02, 3.952e-02, -6.377e-02, 1.787e-02, 1.770e-02, -1.739e-03, 5.596e-03, 1.804e-03, 1.023e-01, 2.178e-02, -4.965e-02, -1.213e-02, 5.301e-02, -1.652e-02, 4.560e-02, 3.575e-02));
	r += mul(s6_0, M4(5.305e-02, 6.024e-03, -6.012e-03, -2.101e-02, 9.004e-04, 1.515e-02, -3.313e-02, 6.079e-02, -2.658e-02, -4.520e-02, -7.564e-03, -1.143e-01, 9.037e-02, 8.192e-03, -3.790e-02, 1.233e-01));
	r += mul(s6_1, M4(-4.037e-02, 2.179e-02, -7.962e-02, 9.745e-02, -5.554e-02, -3.406e-02, -3.068e-03, -5.949e-02, 6.269e-02, 5.716e-02, 5.162e-02, 3.283e-02, 3.814e-02, -1.724e-02, -1.610e-02, -3.306e-02));
	r += mul(s6_2, M4(2.850e-02, 1.388e-02, -1.091e-02, -1.767e-03, 4.596e-04, 7.681e-04, -4.901e-02, 3.987e-02, -6.414e-03, -2.162e-02, 2.892e-02, -6.097e-03, -2.390e-02, -3.975e-02, -3.758e-02, 3.565e-02));
	r += mul(s6_3, M4(-1.030e-01, 5.867e-02, 6.311e-02, 4.378e-02, -7.515e-02, 7.870e-04, 2.617e-02, 1.581e-02, 2.060e-02, -2.820e-02, -6.539e-03, -2.060e-01, 1.867e-02, 7.975e-02, 7.304e-02, -1.851e-01));
	r += mul(s6_4, M4(1.318e-01, 6.028e-02, 8.039e-02, -4.800e-02, -1.777e-01, -2.444e-01, 7.054e-02, -2.082e-02, 1.185e-01, 1.637e-01, -1.121e-01, -2.441e-02, 2.106e-01, 4.146e-02, -6.769e-02, 6.979e-02));
	r += mul(s6_5, M4(1.051e-02, 2.402e-02, 4.806e-02, -3.359e-02, -6.737e-02, -3.636e-02, 4.301e-02, 1.530e-03, 1.313e-01, 1.824e-02, -8.806e-02, -7.708e-02, 9.100e-02, 3.534e-02, 2.935e-02, 3.096e-02));
	r += mul(s6_6, M4(3.856e-02, -3.014e-03, 3.394e-02, 1.071e-01, -4.078e-02, -6.640e-02, -2.350e-02, 1.959e-02, 9.128e-03, -4.047e-02, -3.454e-02, -8.068e-02, -3.010e-02, 6.882e-02, 5.409e-02, -7.349e-02));
	r += mul(s6_7, M4(6.851e-02, 8.326e-02, 1.533e-01, -6.287e-02, 1.728e-02, -1.714e-01, -5.740e-02, 4.809e-03, 2.828e-02, 3.169e-02, -6.673e-02, 3.754e-02, -2.514e-02, 3.858e-02, 6.254e-02, -7.638e-02));
	r += mul(s6_8, M4(-5.590e-03, 5.953e-02, 7.725e-02, -2.560e-03, -1.225e-01, -2.320e-02, 4.039e-02, -3.092e-02, 3.539e-02, 9.669e-03, -8.232e-02, -3.003e-02, 1.670e-02, -2.627e-02, 1.353e-02, -1.655e-02));
	r += mul(s7_0, M4(-6.490e-02, 5.115e-03, -1.528e-02, -1.145e-01, 7.564e-02, 7.405e-02, -5.443e-02, 6.745e-03, -6.547e-02, -4.020e-02, -4.225e-02, 3.839e-02, 1.049e-01, 4.631e-02, -5.378e-02, 1.743e-01));
	r += mul(s7_1, M4(3.424e-04, 1.829e-02, 9.501e-03, 5.903e-02, 1.090e-01, 1.012e-01, -6.213e-03, -4.232e-02, -3.044e-02, -3.550e-03, 6.156e-02, -1.484e-03, 6.939e-03, -8.635e-03, 2.082e-03, -3.243e-02));
	r += mul(s7_2, M4(7.190e-02, 9.364e-02, 2.862e-02, -8.397e-02, 6.550e-02, 6.722e-02, -6.371e-02, -4.278e-03, 4.129e-02, 6.908e-03, -5.093e-02, 8.225e-04, -3.228e-02, 1.587e-02, 1.322e-02, 9.009e-02));
	r += mul(s7_3, M4(-1.729e-02, 7.476e-02, 6.151e-02, 3.103e-02, 8.877e-02, 6.905e-02, -2.906e-02, -9.292e-02, -1.081e-01, -6.498e-02, 8.411e-02, -3.789e-02, -2.383e-02, 5.354e-02, -1.833e-03, -4.389e-03));
	r += mul(s7_4, M4(8.625e-02, 5.764e-02, -2.618e-01, -3.980e-02, -6.684e-02, -1.350e-02, -2.388e-01, 1.900e-01, 6.187e-02, 2.144e-02, -3.389e-03, -9.097e-02, 1.580e-01, 1.538e-01, 2.038e-02, -6.656e-02));
	r += mul(s7_5, M4(-1.117e-01, 1.565e-02, -5.238e-02, -8.835e-02, 6.472e-03, 8.414e-02, 3.239e-02, -4.135e-02, 1.121e-02, -7.688e-03, -1.144e-01, 1.978e-02, 5.818e-02, 6.246e-02, -3.547e-02, 4.597e-02));
	r += mul(s7_6, M4(2.220e-02, 7.870e-03, 2.765e-02, 2.444e-03, -7.059e-02, 6.972e-02, -5.966e-02, 1.448e-01, 5.399e-02, -1.218e-01, -5.895e-02, 3.625e-02, -4.502e-02, 6.365e-02, 2.878e-02, 4.715e-02));
	r += mul(s7_7, M4(-7.888e-02, 1.038e-01, -3.999e-02, 2.142e-01, 7.834e-02, 1.839e-02, 9.709e-02, 2.613e-02, -3.665e-02, -1.361e-03, 3.381e-02, -3.310e-02, -3.747e-02, 6.834e-02, 3.315e-02, 8.918e-03));
	r += mul(s7_8, M4(-3.474e-02, 6.471e-02, 8.885e-02, -4.717e-02, 3.719e-02, 9.219e-02, -4.642e-02, -9.449e-03, -2.453e-02, -2.280e-02, -6.700e-02, -5.099e-03, -2.730e-02, 1.369e-02, 4.512e-02, 2.629e-02));
	r += V4(2.436e-02, 1.957e-02, -1.232e-02, -3.296e-02);
	return r;
}

V4 f3(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) {
	V4 r = 0.0;
	r += mul(s0_0, M4(1.208e-01, -1.226e-01, 4.139e-02, 1.672e-02, 3.747e-02, 1.641e-02, -1.400e-02, 2.603e-03, -9.280e-02, 1.165e-01, 6.712e-02, 5.747e-02, 5.795e-03, -1.884e-02, -5.806e-02, -2.103e-02));
	r += mul(s0_1, M4(-4.448e-02, -1.759e-01, 4.656e-02, -5.958e-02, -7.425e-02, 4.919e-02, -8.946e-02, 3.773e-02, 1.568e-01, 1.065e-01, -4.453e-02, -7.229e-02, 3.335e-03, -1.265e-02, -3.742e-02, -5.298e-02));
	r += mul(s0_2, M4(9.436e-03, -1.831e-02, 4.140e-03, 6.075e-02, 3.335e-02, 3.694e-02, -2.059e-02, 3.985e-02, -3.482e-02, -2.645e-02, -6.472e-02, -5.205e-03, -1.363e-02, -1.129e-01, 6.678e-03, 1.812e-02));
	r += mul(s0_3, M4(-1.682e-02, 3.792e-02, -9.146e-02, -8.048e-03, 1.158e-01, 6.700e-04, 2.752e-02, -5.915e-03, -6.619e-02, -2.281e-01, -1.466e-01, 1.194e-01, -7.796e-02, 2.379e-01, 4.111e-03, -6.715e-02));
	r += mul(s0_4, M4(-2.327e-01, 8.179e-02, -2.227e-01, -1.154e-01, -3.701e-02, 2.974e-02, 8.689e-02, -9.778e-02, 7.231e-02, -8.271e-02, -7.702e-02, -6.086e-02, -7.075e-02, 3.773e-02, 1.167e-01, 1.593e-02));
	r += mul(s0_5, M4(2.426e-02, 4.114e-02, -7.095e-02, 1.081e-01, 2.005e-03, 2.834e-02, -4.480e-02, -1.039e-03, -3.527e-02, 9.050e-02, 8.530e-02, -2.006e-02, 2.647e-02, 4.315e-02, 4.213e-02, 6.488e-02));
	r += mul(s0_6, M4(-3.340e-02, 4.409e-02, 2.409e-04, 3.593e-03, -1.914e-03, 5.568e-02, -1.601e-02, 6.657e-02, -3.951e-02, 1.302e-02, 8.837e-02, 6.599e-02, 5.774e-02, -9.646e-02, -4.925e-02, -7.544e-02));
	r += mul(s0_7, M4(-4.754e-02, 3.320e-02, -6.046e-02, -5.000e-02, 1.162e-01, -6.466e-03, 1.049e-01, -4.923e-02, 4.806e-02, -2.365e-03, 4.935e-02, -1.513e-02, 6.068e-02, -4.087e-02, -2.004e-02, 1.045e-01));
	r += mul(s0_8, M4(4.572e-02, 2.684e-02, 4.193e-02, -6.155e-03, -1.725e-02, 1.555e-02, 1.902e-02, -7.173e-02, 3.296e-02, -2.906e-02, -3.089e-02, 1.108e-01, -2.672e-02, -1.714e-03, -2.541e-02, 1.468e-02));
	r += mul(s1_0, M4(1.692e-01, -3.109e-01, 7.963e-02, 3.339e-02, 4.335e-02, -8.582e-03, 9.552e-03, -2.264e-02, -7.118e-02, 2.155e-02, 5.057e-02, 3.908e-03, -2.734e-02, 7.640e-03, -4.017e-02, 1.679e-02));
	r += mul(s1_1, M4(-4.408e-02, -2.848e-01, 9.724e-02, -6.205e-02, -6.897e-03, -1.005e-01, 7.328e-02, -2.755e-02, -3.623e-02, 8.837e-03, -1.560e-02, -7.135e-02, 1.371e-02, 1.046e-01, 1.830e-02, -2.500e-02));
	r += mul(s1_2, M4(-4.164e-02, -5.760e-02, -4.867e-03, -4.731e-02, -1.110e-02, -3.070e-02, 2.912e-02, 3.289e-02, 3.885e-02, -5.508e-04, -1.053e-02, 1.754e-02, 5.379e-03, -5.888e-02, 3.521e-02, -2.847e-02));
	r += mul(s1_3, M4(1.379e-02, 1.928e-01, -9.303e-03, 5.900e-02, -1.323e-01, 8.078e-02, -5.612e-02, -5.716e-02, 3.289e-02, -9.726e-02, -8.349e-02, 5.944e-02, -6.001e-02, 6.552e-03, 2.450e-03, -3.166e-02));
	r += mul(s1_4, M4(-1.411e-01, -1.940e-02, -3.377e-01, -9.430e-02, -2.508e-02, 1.420e-01, -2.662e-02, 1.240e-01, -9.893e-02, 1.008e-01, -1.202e-01, -4.663e-02, -8.621e-02, -6.152e-02, -1.013e-01, -4.004e-03));
	r += mul(s1_5, M4(1.475e-02, 5.437e-02, -1.313e-02, 8.481e-02, 6.013e-02, 1.428e-02, 2.697e-03, 7.665e-02, 2.210e-02, -4.551e-02, -6.520e-02, 3.908e-03, 2.881e-02, 2.892e-02, 9.411e-02, 1.113e-02));
	r += mul(s1_6, M4(1.295e-02, 2.119e-02, 3.595e-02, -1.883e-02, 5.922e-02, 1.354e-01, 1.468e-02, 1.992e-02, 8.332e-02, 2.735e-02, -5.336e-02, 7.069e-02, -2.745e-02, -4.790e-02, -7.620e-02, 2.050e-02));
	r += mul(s1_7, M4(-1.151e-01, 1.238e-02, -3.719e-03, -8.327e-03, -1.265e-01, 4.812e-02, 9.510e-03, 2.710e-02, 1.868e-02, -3.206e-02, -1.689e-02, 8.033e-02, 4.113e-02, 8.544e-02, -2.066e-03, 2.793e-02));
	r += mul(s1_8, M4(-2.543e-02, 2.565e-02, 5.130e-02, 3.102e-03, 5.037e-02, 4.359e-02, -5.993e-02, 1.153e-01, 4.809e-03, 4.918e-02, 3.962e-02, -4.677e-03, 1.491e-02, -5.397e-02, -2.091e-02, 1.154e-02));
	r += mul(s2_0, M4(-4.696e-02, -6.299e-02, -2.885e-02, -3.337e-02, 5.491e-02, -7.292e-02, -2.948e-02, -5.428e-02, -4.811e-02, 1.687e-01, 9.580e-03, -3.189e-02, 1.525e-01, -1.680e-03, 2.752e-02, -5.007e-04));
	r += mul(s2_1, M4(6.137e-02, 8.930e-02, 1.935e-02, -9.878e-02, 6.470e-03, -1.180e-03, 4.439e-02, -4.159e-02, 7.039e-02, 6.277e-02, -4.220e-02, -3.506e-02, -7.495e-02, -4.569e-02, 9.915e-02, -4.819e-02));
	r += mul(s2_2, M4(-6.690e-03, -4.709e-02, -8.617e-03, 1.128e-02, -1.234e-02, -5.296e-02, -1.959e-02, 8.562e-03, 2.983e-02, 6.469e-02, -8.118e-03, 9.698e-03, 1.594e-02, 1.233e-01, 7.226e-02, 4.709e-02));
	r += mul(s2_3, M4(1.362e-01, 9.432e-02, 6.378e-02, -2.612e-02, 4.053e-02, -3.164e-02, 2.733e-02, -2.000e-02, 1.917e-01, 3.272e-02, -4.118e-02, -2.870e-02, 1.899e-01, 1.436e-02, 6.496e-02, -2.292e-02));
	r += mul(s2_4, M4(-1.569e-02, 7.891e-03, 1.097e-01, -1.259e-01, 5.660e-03, -2.804e-02, -1.121e-01, 4.181e-02, 1.050e-01, -3.083e-02, -4.846e-02, -8.452e-02, 1.462e-02, -8.250e-02, -1.173e-01, -9.593e-03));
	r += mul(s2_5, M4(-1.959e-02, 4.876e-03, 6.666e-02, 2.711e-02, 5.521e-02, 2.238e-02, 2.469e-02, 1.929e-01, -3.808e-02, -1.214e-01, -4.719e-02, 1.663e-01, -5.399e-02, -1.247e-01, -1.763e-01, -4.044e-02));
	r += mul(s2_6, M4(1.661e-01, -7.206e-02, 4.163e-02, -7.267e-02, 3.635e-02, -3.221e-02, 3.350e-03, -6.100e-03, 2.232e-02, -4.944e-03, -3.431e-02, -1.230e-01, -5.037e-02, 7.514e-02, 2.936e-02, 3.774e-03));
	r += mul(s2_7, M4(-2.536e-03, -2.618e-02, -1.153e-02, -5.133e-02, -5.048e-02, -7.482e-03, -9.806e-03, -2.532e-02, -1.631e-02, 1.052e-03, 6.459e-02, 1.764e-02, -3.458e-03, 1.575e-02, 2.013e-02, 1.126e-01));
	r += mul(s2_8, M4(-1.950e-03, 1.616e-02, 4.400e-02, -2.765e-03, -2.407e-02, 2.714e-02, 6.637e-02, 5.078e-02, -2.704e-02, 6.811e-02, -6.171e-02, -6.063e-02, -3.555e-03, -7.526e-03, -1.073e-02, -6.472e-02));
	r += mul(s3_0, M4(-1.586e-02, -8.472e-02, 1.757e-02, -7.384e-04, 7.426e-03, -6.145e-02, 1.518e-02, -5.079e-02, 3.692e-02, -2.280e-02, 1.228e-02, 2.641e-02, -2.579e-02, -2.550e-02, -9.544e-03, 2.705e-03));
	r += mul(s3_1, M4(-3.033e-02, 1.983e-02, -6.241e-03, -6.516e-02, -4.897e-02, -1.499e-01, -2.616e-02, 5.815e-02, 7.285e-02, 1.058e-01, 8.509e-02, 5.656e-02, -7.548e-02, -1.785e-01, 3.110e-02, -1.290e-02));
	r += mul(s3_2, M4(-2.506e-03, -8.837e-02, -4.282e-02, -2.442e-02, -1.142e-02, -1.472e-01, 8.187e-02, -6.503e-03, 4.318e-03, 1.718e-02, 3.119e-02, 4.592e-02, 4.852e-02, 6.621e-02, 6.957e-02, 8.130e-02));
	r += mul(s3_3, M4(7.670e-03, 4.436e-02, 3.912e-02, -2.809e-02, 3.055e-02, -1.178e-01, -4.016e-02, -1.266e-02, 2.756e-02, -1.929e-02, -2.997e-02, -3.582e-02, -9.364e-02, -8.080e-02, 1.828e-02, 2.897e-02));
	r += mul(s3_4, M4(-8.633e-02, -6.935e-02, -7.481e-03, -1.387e-01, -4.836e-02, -1.039e-01, -5.183e-02, -1.802e-02, 4.349e-02, -1.398e-01, -1.713e-01, -7.101e-02, -4.267e-02, -3.547e-02, -2.141e-01, -3.583e-01));
	r += mul(s3_5, M4(-6.121e-03, 2.361e-02, -1.575e-02, 7.977e-02, 4.023e-02, 6.795e-02, -1.890e-02, 6.154e-02, 3.224e-03, -1.176e-02, 4.541e-02, 7.858e-02, -5.268e-02, -3.705e-02, -1.409e-01, 9.165e-02));
	r += mul(s3_6, M4(-2.116e-02, 4.334e-02, -2.392e-03, -3.574e-02, -6.518e-03, -3.482e-02, 6.002e-03, -2.301e-02, -7.438e-02, -2.872e-02, -6.940e-02, -2.751e-02, 1.971e-02, -1.097e-02, 6.442e-02, -5.565e-02));
	r += mul(s3_7, M4(-2.776e-02, -1.469e-03, -5.992e-02, -8.719e-02, -2.990e-02, 3.825e-02, 3.316e-03, -1.588e-02, -2.449e-04, 2.527e-02, -2.485e-02, -1.512e-01, 5.911e-02, 3.865e-02, 9.859e-03, 1.625e-01));
	r += mul(s3_8, M4(1.487e-02, -5.139e-03, 1.519e-02, 9.446e-03, -1.208e-02, -3.288e-02, 1.786e-03, 1.715e-02, 3.260e-02, -1.019e-01, -1.111e-01, -6.100e-02, -4.261e-02, -2.880e-02, 1.342e-02, -6.207e-02));
	r += mul(s4_0, M4(7.473e-02, -9.843e-02, 4.634e-02, 1.222e-02, -9.640e-02, 8.247e-02, -3.801e-02, -2.276e-02, 1.277e-02, 4.691e-03, 2.958e-02, -2.230e-02, 1.357e-02, 1.769e-02, -4.195e-02, -6.130e-02));
	r += mul(s4_1, M4(4.292e-02, 5.470e-02, 3.183e-03, 9.491e-02, -5.083e-02, -3.304e-02, -9.519e-02, 5.875e-02, -4.293e-03, -5.284e-02, 2.780e-02, 3.798e-02, 1.284e-02, -2.329e-03, 1.198e-02, -5.990e-02));
	r += mul(s4_2, M4(-6.650e-02, 5.448e-02, -4.260e-02, -2.709e-02, 8.678e-02, 3.785e-02, 7.341e-02, -4.835e-02, -1.063e-02, -7.336e-02, 2.946e-03, -6.587e-02, -3.151e-02, 3.142e-02, -3.179e-02, 3.592e-02));
	r += mul(s4_3, M4(1.646e-02, 1.071e-01, -8.085e-02, 9.750e-03, 2.637e-02, -5.760e-02, -2.088e-02, -1.460e-01, 7.848e-03, -5.850e-02, -1.927e-03, -1.073e-02, -7.960e-02, -1.548e-01, -2.802e-02, -8.254e-03));
	r += mul(s4_4, M4(1.430e-01, -1.003e-01, -1.136e-01, 1.973e-01, -2.317e-01, 9.932e-02, 6.234e-02, -1.388e-02, 6.027e-02, 7.720e-03, 2.609e-02, -2.970e-02, -5.399e-03, -3.966e-02, -8.715e-02, 8.388e-02));
	r += mul(s4_5, M4(-5.722e-02, 2.848e-02, 6.717e-02, -1.891e-01, 1.238e-01, 1.638e-02, -9.690e-02, 2.956e-02, 2.124e-02, -4.084e-02, 8.300e-03, -6.430e-02, -5.404e-02, -3.029e-03, -4.792e-02, 1.314e-01));
	r += mul(s4_6, M4(-5.015e-02, -9.406e-02, 2.852e-02, -5.992e-03, 8.162e-02, -4.082e-02, -1.592e-03, 4.615e-02, -7.159e-02, 2.490e-02, 1.560e-02, -4.330e-02, -4.604e-02, 6.942e-02, -1.232e-02, 1.700e-03));
	r += mul(s4_7, M4(6.375e-02, 6.614e-02, -1.595e-02, 5.002e-02, -5.885e-02, -6.411e-02, 7.694e-02, -2.888e-03, -4.576e-03, -6.373e-03, -5.908e-02, -6.341e-03, 7.994e-03, -2.970e-02, -2.836e-03, 1.464e-01));
	r += mul(s4_8, M4(-6.851e-02, -9.533e-02, 1.039e-02, -7.268e-02, 1.300e-01, -2.387e-02, -4.770e-02, 1.225e-02, 7.557e-04, 1.826e-02, 2.401e-02, 2.455e-03, 9.505e-03, 1.583e-02, -7.808e-03, -4.432e-02));
	r += mul(s5_0, M4(1.032e-02, -1.248e-01, -4.259e-02, -2.107e-02, -2.476e-02, -2.230e-02, 3.155e-03, -6.801e-03, -6.227e-02, 1.033e-01, 1.546e-02, -2.175e-03, -1.698e-02, 4.505e-02, 1.032e-01, -1.746e-02));
	r += mul(s5_1, M4(7.643e-02, 4.087e-02, -5.026e-02, 9.843e-02, 5.734e-02, -1.803e-02, -3.746e-02, -4.964e-02, 1.173e-01, 1.584e-02, -1.999e-01, -7.213e-02, 3.227e-03, -5.543e-02, -8.192e-02, -2.449e-02));
	r += mul(s5_2, M4(1.835e-02, 4.458e-03, 4.713e-02, 1.193e-02, -7.368e-03, -2.718e-02, -7.838e-02, 1.944e-02, -1.046e-02, 3.976e-02, -1.163e-01, -8.234e-02, -7.764e-02, -5.386e-03, -3.642e-02, -4.625e-03));
	r += mul(s5_3, M4(-4.249e-02, -4.110e-02, -9.450e-02, -3.841e-02, 4.038e-02, -5.472e-02, -4.634e-02, 5.570e-02, -2.153e-01, 1.431e-01, 1.478e-01, -1.139e-01, 6.344e-02, -6.702e-02, -6.537e-02, 4.383e-02));
	r += mul(s5_4, M4(2.586e-01, -1.978e-01, 2.032e-02, 4.913e-02, -8.419e-02, 4.377e-02, 3.250e-02, -5.745e-02, -1.402e-01, 5.409e-02, 2.995e-02, 1.815e-02, 5.592e-03, -2.316e-02, 1.624e-01, 2.534e-01));
	r += mul(s5_5, M4(-3.445e-02, -4.932e-02, -1.166e-02, 7.935e-02, -2.074e-02, 1.331e-02, -6.949e-02, -9.694e-02, 7.806e-02, -4.970e-02, 8.471e-02, -3.458e-03, -5.019e-02, 4.166e-03, -9.217e-02, -5.729e-02));
	r += mul(s5_6, M4(3.876e-02, 5.201e-04, 7.537e-02, -6.016e-02, 3.884e-02, 2.412e-02, 3.531e-02, -2.031e-02, -1.272e-01, -5.110e-02, -1.301e-01, -6.391e-03, -8.345e-03, 1.014e-01, 7.550e-02, -4.753e-03));
	r += mul(s5_7, M4(9.155e-02, 8.092e-02, -4.110e-02, 2.557e-02, 6.704e-02, -4.796e-02, 9.720e-03, 6.098e-02, -1.181e-01, -1.338e-02, 1.684e-02, -3.873e-02, -3.165e-02, 3.809e-02, -8.942e-02, 1.094e-01));
	r += mul(s5_8, M4(-1.930e-02, -1.798e-02, 5.993e-02, -3.256e-02, -2.257e-02, 5.359e-03, 7.253e-04, -4.748e-02, 9.689e-02, 2.043e-02, 1.876e-02, -7.769e-02, -1.006e-02, -1.476e-02, 1.630e-02, -3.216e-03));
	r += mul(s6_0, M4(-1.942e-02, -1.636e-02, -2.003e-03, -9.176e-03, 4.595e-02, -3.816e-02, -6.196e-03, 4.540e-02, 4.486e-02, -5.883e-02, 4.952e-02, -4.961e-02, -2.278e-02, 8.348e-02, 4.060e-02, 3.106e-02));
	r += mul(s6_1, M4(5.459e-02, -7.661e-03, 3.247e-02, -2.312e-02, 1.555e-03, 2.422e-03, 2.480e-02, 8.737e-03, 8.432e-03, 3.619e-02, 4.601e-02, 7.276e-02, 4.691e-02, -2.799e-02, -3.063e-02, -5.632e-02));
	r += mul(s6_2, M4(1.771e-02, -4.439e-02, 8.003e-03, 4.068e-02, 2.212e-02, 2.882e-02, 3.817e-02, 1.550e-02, 4.477e-02, 5.303e-03, 4.320e-02, 8.337e-02, -2.148e-02, -1.042e-02, -4.141e-02, -7.424e-03));
	r += mul(s6_3, M4(-1.046e-02, 1.348e-01, -7.347e-02, 3.864e-04, -2.987e-02, 1.721e-02, -1.031e-02, 1.365e-02, 1.127e-01, -1.271e-01, -2.893e-02, -2.252e-02, 7.782e-02, 6.553e-02, 1.611e-02, 7.165e-02));
	r += mul(s6_4, M4(-1.647e-01, 8.367e-03, -1.954e-01, 3.519e-02, -6.066e-02, -1.751e-01, 1.987e-02, 4.552e-02, -3.052e-02, 1.324e-01, 7.788e-02, 1.411e-01, 5.785e-02, 8.841e-03, 3.008e-02, -1.193e-02));
	r += mul(s6_5, M4(4.575e-02, 2.933e-02, -3.609e-02, -1.482e-02, -4.333e-02, -2.008e-02, -3.871e-02, -5.532e-02, 9.783e-02, 6.445e-02, 1.499e-01, 1.683e-01, -2.356e-02, 6.141e-02, 2.904e-03, 2.112e-02));
	r += mul(s6_6, M4(-3.586e-02, 7.513e-03, 2.905e-02, -2.824e-02, 2.945e-02, -4.356e-02, 8.347e-04, 1.698e-03, 8.889e-02, -3.999e-02, -4.600e-02, -8.680e-03, -9.044e-03, -5.725e-03, 2.258e-03, 3.439e-02));
	r += mul(s6_7, M4(-5.036e-02, 1.410e-01, 2.360e-03, -7.009e-02, 4.161e-02, -9.975e-02, -1.061e-01, -1.230e-02, -4.415e-02, -6.260e-02, 4.748e-02, 7.399e-02, 7.400e-02, 5.209e-02, 1.495e-02, 1.961e-02));
	r += mul(s6_8, M4(3.887e-02, 2.861e-02, -5.926e-02, 3.014e-02, -4.256e-02, -3.001e-02, -5.039e-03, 1.458e-04, 6.518e-02, -2.086e-02, 5.172e-02, 3.376e-02, -2.826e-02, -1.956e-02, -6.630e-02, 2.044e-02));
	r += mul(s7_0, M4(2.581e-02, -8.798e-02, -3.776e-02, 5.633e-02, -4.144e-02, 1.434e-01, 5.310e-02, 4.405e-02, 8.046e-04, -7.999e-02, 3.022e-02, -4.280e-02, -5.826e-02, 9.874e-02, 3.675e-02, 1.498e-02));
	r += mul(s7_1, M4(4.093e-02, 1.469e-01, 3.709e-02, -2.254e-02, 5.227e-02, 1.961e-02, 2.499e-02, 4.711e-02, -4.289e-02, -3.350e-02, -1.520e-02, -1.267e-02, 1.336e-02, -1.772e-01, -7.237e-03, -6.089e-02));
	r += mul(s7_2, M4(-3.291e-03, 4.629e-03, -2.715e-02, -4.765e-03, 4.204e-03, -7.442e-03, 1.517e-02, -3.461e-03, 6.636e-02, 6.446e-02, 5.237e-02, -1.822e-02, 1.204e-02, 9.596e-03, 2.026e-02, -4.059e-02));
	r += mul(s7_3, M4(1.446e-01, 1.120e-01, 1.445e-02, 4.715e-02, -9.855e-02, 1.332e-01, 7.686e-02, 3.075e-02, 1.389e-01, -2.450e-02, -1.558e-01, 2.449e-02, -1.939e-02, -2.909e-02, 2.642e-02, 1.386e-02));
	r += mul(s7_4, M4(-3.889e-02, 1.283e-01, 1.610e-01, -1.688e-01, 3.061e-02, -1.769e-01, 2.392e-01, 8.607e-02, -5.087e-02, 9.072e-02, 3.085e-02, -4.719e-02, 7.219e-02, 1.239e-01, 8.664e-02, -4.961e-02));
	r += mul(s7_5, M4(7.244e-02, -2.825e-02, 7.402e-02, 3.481e-03, -3.425e-02, 4.283e-02, 7.261e-02, -8.555e-02, 1.067e-01, -8.323e-03, 1.513e-02, 7.220e-02, -5.575e-02, 1.445e-02, -1.231e-02, 8.109e-02));
	r += mul(s7_6, M4(-1.548e-02, -1.125e-01, 7.741e-02, -2.596e-02, 3.716e-02, -4.756e-02, -3.039e-02, 3.515e-02, 1.175e-02, -5.446e-03, -3.847e-02, -9.783e-03, -3.526e-02, 6.083e-03, -2.624e-02, 3.069e-02));
	r += mul(s7_7, M4(1.090e-01, -8.652e-02, 1.857e-01, 4.552e-02, -1.905e-02, 4.213e-02, -4.890e-03, -1.129e-01, -5.078e-02, -1.977e-02, 8.947e-02, 2.143e-02, 8.086e-03, -2.149e-02, 1.718e-02, -1.472e-02));
	r += mul(s7_8, M4(-1.970e-02, -4.491e-03, -2.376e-02, -7.501e-02, -1.957e-03, 6.665e-02, 1.631e-02, -9.853e-03, 6.115e-02, 5.283e-03, -1.900e-02, -1.069e-02, -2.595e-02, -2.954e-02, -1.801e-02, 1.378e-02));
	r += V4(9.662e-03, 1.842e-02, 1.390e-02, 1.935e-02);
	return r;
}

void Pass8(uint2 blockStart, uint3 tid) {
	float2 pt = float2(GetInputPt());
	uint2 gxy = Rmp8x8(tid.x) + blockStart;
	uint2 size = GetInputSize();
	if (gxy.x >= size.x || gxy.y >= size.y) {
		return;
	}
	float2 pos = (gxy + 0.5) * pt;

	V4 s0_0 = l0(-1.0, -1.0);
	V4 s0_1 = l0(0.0, -1.0);
	V4 s0_2 = l0(1.0, -1.0);
	V4 s0_3 = l0(-1.0, 0.0);
	V4 s0_4 = l0(0.0, 0.0);
	V4 s0_5 = l0(1.0, 0.0);
	V4 s0_6 = l0(-1.0, 1.0);
	V4 s0_7 = l0(0.0, 1.0);
	V4 s0_8 = l0(1.0, 1.0);
	V4 s1_0 = -max(-s0_0, 0.0);
	V4 s1_1 = -max(-s0_1, 0.0);
	V4 s1_2 = -max(-s0_2, 0.0);
	V4 s1_3 = -max(-s0_3, 0.0);
	V4 s1_4 = -max(-s0_4, 0.0);
	V4 s1_5 = -max(-s0_5, 0.0);
	V4 s1_6 = -max(-s0_6, 0.0);
	V4 s1_7 = -max(-s0_7, 0.0);
	V4 s1_8 = -max(-s0_8, 0.0);
	s0_0 = max(s0_0, 0.0);
	s0_1 = max(s0_1, 0.0);
	s0_2 = max(s0_2, 0.0);
	s0_3 = max(s0_3, 0.0);
	s0_4 = max(s0_4, 0.0);
	s0_5 = max(s0_5, 0.0);
	s0_6 = max(s0_6, 0.0);
	s0_7 = max(s0_7, 0.0);
	s0_8 = max(s0_8, 0.0);

	V4 s2_0 = l1(-1.0, -1.0);
	V4 s2_1 = l1(0.0, -1.0);
	V4 s2_2 = l1(1.0, -1.0);
	V4 s2_3 = l1(-1.0, 0.0);
	V4 s2_4 = l1(0.0, 0.0);
	V4 s2_5 = l1(1.0, 0.0);
	V4 s2_6 = l1(-1.0, 1.0);
	V4 s2_7 = l1(0.0, 1.0);
	V4 s2_8 = l1(1.0, 1.0);
	V4 s3_0 = -max(-s2_0, 0.0);
	V4 s3_1 = -max(-s2_1, 0.0);
	V4 s3_2 = -max(-s2_2, 0.0);
	V4 s3_3 = -max(-s2_3, 0.0);
	V4 s3_4 = -max(-s2_4, 0.0);
	V4 s3_5 = -max(-s2_5, 0.0);
	V4 s3_6 = -max(-s2_6, 0.0);
	V4 s3_7 = -max(-s2_7, 0.0);
	V4 s3_8 = -max(-s2_8, 0.0);
	s2_0 = max(s2_0, 0.0);
	s2_1 = max(s2_1, 0.0);
	s2_2 = max(s2_2, 0.0);
	s2_3 = max(s2_3, 0.0);
	s2_4 = max(s2_4, 0.0);
	s2_5 = max(s2_5, 0.0);
	s2_6 = max(s2_6, 0.0);
	s2_7 = max(s2_7, 0.0);
	s2_8 = max(s2_8, 0.0);

	V4 s4_0 = l2(-1.0, -1.0);
	V4 s4_1 = l2(0.0, -1.0);
	V4 s4_2 = l2(1.0, -1.0);
	V4 s4_3 = l2(-1.0, 0.0);
	V4 s4_4 = l2(0.0, 0.0);
	V4 s4_5 = l2(1.0, 0.0);
	V4 s4_6 = l2(-1.0, 1.0);
	V4 s4_7 = l2(0.0, 1.0);
	V4 s4_8 = l2(1.0, 1.0);
	V4 s5_0 = -max(-s4_0, 0.0);
	V4 s5_1 = -max(-s4_1, 0.0);
	V4 s5_2 = -max(-s4_2, 0.0);
	V4 s5_3 = -max(-s4_3, 0.0);
	V4 s5_4 = -max(-s4_4, 0.0);
	V4 s5_5 = -max(-s4_5, 0.0);
	V4 s5_6 = -max(-s4_6, 0.0);
	V4 s5_7 = -max(-s4_7, 0.0);
	V4 s5_8 = -max(-s4_8, 0.0);
	s4_0 = max(s4_0, 0.0);
	s4_1 = max(s4_1, 0.0);
	s4_2 = max(s4_2, 0.0);
	s4_3 = max(s4_3, 0.0);
	s4_4 = max(s4_4, 0.0);
	s4_5 = max(s4_5, 0.0);
	s4_6 = max(s4_6, 0.0);
	s4_7 = max(s4_7, 0.0);
	s4_8 = max(s4_8, 0.0);

	V4 s6_0 = l3(-1.0, -1.0);
	V4 s6_1 = l3(0.0, -1.0);
	V4 s6_2 = l3(1.0, -1.0);
	V4 s6_3 = l3(-1.0, 0.0);
	V4 s6_4 = l3(0.0, 0.0);
	V4 s6_5 = l3(1.0, 0.0);
	V4 s6_6 = l3(-1.0, 1.0);
	V4 s6_7 = l3(0.0, 1.0);
	V4 s6_8 = l3(1.0, 1.0);
	V4 s7_0 = -max(-s6_0, 0.0);
	V4 s7_1 = -max(-s6_1, 0.0);
	V4 s7_2 = -max(-s6_2, 0.0);
	V4 s7_3 = -max(-s6_3, 0.0);
	V4 s7_4 = -max(-s6_4, 0.0);
	V4 s7_5 = -max(-s6_5, 0.0);
	V4 s7_6 = -max(-s6_6, 0.0);
	V4 s7_7 = -max(-s6_7, 0.0);
	V4 s7_8 = -max(-s6_8, 0.0);
	s6_0 = max(s6_0, 0.0);
	s6_1 = max(s6_1, 0.0);
	s6_2 = max(s6_2, 0.0);
	s6_3 = max(s6_3, 0.0);
	s6_4 = max(s6_4, 0.0);
	s6_5 = max(s6_5, 0.0);
	s6_6 = max(s6_6, 0.0);
	s6_7 = max(s6_7, 0.0);
	s6_8 = max(s6_8, 0.0);

	t4[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8);
	t5[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8);
	t6[gxy] = f2(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8);
	t7[gxy] = f3(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8);
}

//!PASS 9
//!DESC conv8
//!BLOCK_SIZE 8
//!NUM_THREADS 64
//!IN t4, t5, t6, t7
//!OUT t0, t1, t2, t3

#define l0(x, y) V4(O(t4, float2(x, y)))
#define l1(x, y) V4(O(t5, float2(x, y)))
#define l2(x, y) V4(O(t6, float2(x, y)))
#define l3(x, y) V4(O(t7, float2(x, y)))

V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) {
	V4 r = 0.0;
	r += mul(s0_0, M4(8.887e-02, 3.201e-02, 3.272e-03, 3.765e-02, 5.024e-02, 2.837e-02, 8.306e-03, 2.390e-02, -1.173e-01, -2.534e-02, -1.399e-01, 8.948e-02, -5.215e-03, -1.560e-02, 8.428e-03, -2.487e-02));
	r += mul(s0_1, M4(5.189e-02, -5.796e-02, -4.014e-02, 3.733e-02, 3.837e-02, 9.472e-02, -6.379e-03, 1.350e-02, -1.656e-01, 5.211e-02, 1.362e-01, -6.236e-02, -2.758e-02, -6.192e-02, -1.576e-02, -2.115e-02));
	r += mul(s0_2, M4(5.693e-02, 4.895e-02, -5.442e-02, -3.899e-03, -2.244e-02, -3.409e-02, 2.633e-02, 2.265e-02, -9.917e-02, -2.025e-01, 2.870e-02, -5.216e-02, 5.458e-02, 5.259e-02, 7.653e-03, -7.861e-03));
	r += mul(s0_3, M4(3.048e-02, -2.534e-03, 1.112e-02, -9.310e-02, -4.621e-02, -1.901e-02, 7.749e-02, 6.284e-02, -2.183e-01, 7.311e-03, 8.476e-02, -5.162e-02, 4.214e-02, -2.302e-02, -5.391e-02, 5.794e-02));
	r += mul(s0_4, M4(-9.152e-03, -1.432e-01, 2.792e-02, 8.631e-02, 5.127e-02, 6.313e-03, 5.178e-02, -2.321e-02, -4.154e-01, -2.910e-01, 5.044e-01, 1.183e-01, -3.116e-02, 8.087e-02, 5.677e-02, -3.368e-02));
	r += mul(s0_5, M4(1.282e-02, 2.131e-02, 5.910e-03, 3.835e-02, 4.310e-02, 6.678e-02, -3.528e-03, -5.441e-03, -1.461e-01, 2.870e-01, 8.298e-02, -1.276e-01, 1.755e-02, 4.309e-03, 2.989e-02, -2.564e-02));
	r += mul(s0_6, M4(7.705e-02, -1.516e-02, 2.567e-02, 1.081e-01, 2.617e-02, 2.163e-02, -4.291e-02, -9.064e-03, 8.263e-02, 2.934e-03, -3.025e-02, 1.798e-02, -9.562e-02, 2.937e-02, 3.906e-02, -2.487e-02));
	r += mul(s0_7, M4(9.005e-02, 3.765e-02, -3.861e-02, 1.184e-02, -2.468e-02, -2.764e-03, 6.975e-03, 3.962e-02, 1.575e-02, 2.822e-02, -2.852e-02, 1.132e-01, 2.799e-02, -4.942e-05, 6.324e-02, -4.779e-02));
	r += mul(s0_8, M4(-9.121e-03, -2.050e-02, -5.023e-03, 3.821e-02, -2.644e-02, 1.712e-02, 4.536e-02, -2.812e-02, 1.159e-01, -1.993e-02, -1.738e-01, -4.798e-02, 1.387e-03, -1.129e-02, -4.604e-02, 3.013e-02));
	r += mul(s1_0, M4(-2.237e-02, 5.579e-02, -2.246e-02, 3.570e-02, -2.149e-02, 8.068e-02, -1.792e-02, 3.440e-02, -1.433e-02, 1.115e-02, 1.515e-02, 2.813e-02, -1.558e-02, -3.391e-02, -1.400e-02, -4.613e-02));
	r += mul(s1_1, M4(-1.341e-02, 2.247e-02, 3.174e-02, -1.056e-02, 4.711e-02, 2.507e-01, 5.819e-02, -2.294e-02, 5.427e-03, 2.027e-02, 1.049e-03, -1.555e-02, -4.490e-02, -3.285e-02, -5.561e-02, 2.131e-02));
	r += mul(s1_2, M4(3.709e-02, -1.982e-02, 4.372e-03, -3.034e-03, 3.273e-02, 3.586e-02, -3.369e-02, 9.327e-04, -3.915e-02, -2.741e-02, 3.030e-03, -9.380e-05, -5.189e-02, 2.277e-02, 4.675e-02, 5.198e-03));
	r += mul(s1_3, M4(-7.693e-02, 8.631e-05, 2.404e-02, -5.598e-02, 1.511e-03, 4.933e-02, 5.708e-02, -8.107e-02, 3.209e-02, -1.408e-02, 3.269e-03, 2.666e-02, -1.425e-01, -1.133e-01, -1.107e-02, -1.065e-01));
	r += mul(s1_4, M4(-3.071e-02, -9.263e-02, -7.140e-03, -1.791e-03, -2.620e-01, 2.204e-01, 1.028e-01, -6.621e-02, -2.582e-02, -5.728e-02, 3.437e-02, -1.372e-02, 5.936e-03, -3.954e-01, -4.912e-02, -4.338e-02));
	r += mul(s1_5, M4(5.105e-03, -1.751e-02, 2.083e-02, -5.066e-02, 1.203e-01, -2.110e-02, 2.213e-02, -5.408e-02, 4.408e-02, 1.305e-02, 5.215e-04, -2.305e-02, -1.895e-02, 2.121e-01, 7.568e-03, -1.781e-02));
	r += mul(s1_6, M4(-1.679e-02, -4.701e-02, 3.984e-03, 1.883e-02, 1.751e-02, -8.799e-03, -5.917e-02, -7.541e-02, 3.519e-02, 1.518e-02, -2.381e-02, -4.177e-02, -6.473e-02, -2.278e-02, -1.700e-02, -2.191e-02));
	r += mul(s1_7, M4(2.106e-02, -4.757e-02, -1.853e-02, 2.467e-02, 4.212e-02, 5.024e-02, -9.230e-02, -3.573e-02, -4.607e-03, -2.600e-02, -1.520e-03, 8.444e-02, -9.903e-02, -2.004e-01, 2.285e-01, 1.495e-02));
	r += mul(s1_8, M4(-4.115e-02, -5.467e-02, -4.223e-03, -8.214e-03, -4.322e-03, 5.092e-02, 2.288e-02, -2.948e-03, 6.988e-03, 4.056e-02, 7.262e-03, -2.476e-02, -5.517e-02, 5.640e-02, -1.188e-02, -1.767e-02));
	r += mul(s2_0, M4(-1.913e-02, 2.134e-03, 1.936e-02, -2.429e-02, -5.421e-02, -2.326e-02, -5.509e-02, -3.796e-02, -3.348e-02, 3.749e-02, 1.311e-02, 2.045e-02, 3.166e-03, 1.811e-02, -1.005e-02, 1.696e-02));
	r += mul(s2_1, M4(-9.150e-02, -1.023e-01, 7.368e-03, 2.306e-02, 7.247e-02, -8.859e-02, -2.536e-02, -6.319e-03, -2.311e-02, -1.426e-04, 4.821e-02, -6.262e-02, -1.403e-03, -1.258e-02, 2.554e-02, -1.763e-02));
	r += mul(s2_2, M4(2.814e-02, 3.237e-02, -6.889e-03, 3.033e-02, 3.767e-02, 2.573e-02, -1.026e-01, -4.388e-02, -5.742e-03, -5.933e-02, 2.558e-02, -7.563e-03, -4.052e-02, -4.367e-02, 7.445e-03, -2.313e-02));
	r += mul(s2_3, M4(1.518e-03, 5.084e-02, -2.629e-02, -7.922e-04, -7.820e-02, -4.543e-02, -3.833e-02, -3.059e-02, 2.764e-02, 2.558e-02, -1.724e-02, 1.145e-01, -1.394e-02, -7.350e-03, 2.198e-02, -1.572e-02));
	r += mul(s2_4, M4(-4.070e-02, -2.989e-02, 8.852e-02, -3.323e-02, -1.839e-01, 2.319e-01, 1.441e-01, -6.286e-02, 4.204e-02, 1.371e-01, -7.721e-02, 3.164e-02, -3.207e-02, -1.033e-01, -1.103e-02, 1.647e-02));
	r += mul(s2_5, M4(-3.430e-02, -5.789e-02, 5.859e-02, 5.950e-02, -4.161e-03, -1.968e-01, -5.103e-02, -2.885e-02, -4.215e-02, -4.254e-02, -9.712e-03, -6.761e-03, 1.942e-02, -1.867e-02, -3.283e-02, 1.285e-02));
	r += mul(s2_6, M4(9.657e-03, -2.888e-02, -2.190e-02, 4.982e-02, 1.119e-01, -1.187e-02, 2.497e-03, -2.191e-02, -4.578e-02, -1.121e-02, -3.918e-02, -5.578e-02, 3.260e-02, 1.351e-02, -1.695e-02, -1.870e-02));
	r += mul(s2_7, M4(-6.146e-03, -5.304e-03, -2.515e-02, 6.409e-03, -1.061e-01, -6.602e-02, 1.038e-01, 7.992e-02, -1.125e-02, -1.140e-01, 2.139e-02, 1.562e-02, 3.380e-02, -4.087e-02, -3.406e-02, 2.572e-02));
	r += mul(s2_8, M4(-4.541e-03, -1.534e-02, -1.960e-02, 4.713e-03, 3.191e-02, 3.243e-02, -3.525e-03, 8.842e-02, 2.320e-02, -5.254e-02, 5.576e-03, -6.195e-04, -1.623e-02, 9.936e-03, 2.434e-02, -1.706e-02));
	r += mul(s3_0, M4(-6.865e-02, -2.470e-03, -7.014e-02, 9.169e-02, 8.304e-03, 1.561e-02, -3.308e-02, 6.586e-03, 5.683e-02, -3.107e-02, 1.599e-02, 1.966e-02, 8.197e-03, -8.098e-02, 3.804e-02, 2.126e-03));
	r += mul(s3_1, M4(2.214e-02, 1.383e-01, 1.963e-01, 5.929e-02, 1.310e-02, 3.765e-02, 5.032e-04, 1.223e-02, 5.856e-03, -6.911e-02, -1.580e-02, -4.967e-02, 7.396e-02, 8.123e-02, -2.005e-02, 2.053e-02));
	r += mul(s3_2, M4(2.141e-02, 4.417e-02, -2.593e-02, 1.296e-02, -4.732e-03, 2.229e-02, -1.534e-02, -8.790e-03, -6.795e-03, -3.142e-02, -1.915e-02, -3.137e-03, -8.709e-02, -1.506e-01, 1.318e-01, 3.611e-02));
	r += mul(s3_3, M4(-3.602e-02, 1.065e-02, 6.388e-02, 6.761e-02, -8.897e-02, 1.601e-02, 1.340e-02, -9.509e-03, 1.011e-01, -9.129e-03, 1.162e-02, 7.398e-02, 2.940e-01, -2.547e-01, -4.550e-02, -1.246e-02));
	r += mul(s3_4, M4(3.596e-02, 3.200e-01, -1.405e-01, -2.549e-01, -2.897e-02, 5.599e-02, 1.866e-02, -4.516e-02, 1.024e-02, 2.166e-03, -1.358e-02, 1.027e-01, 2.121e-01, 1.760e-01, -2.349e-01, 5.819e-02));
	r += mul(s3_5, M4(-9.025e-02, -1.524e-01, 2.781e-01, -3.188e-02, 1.139e-02, -1.866e-02, 1.844e-02, 2.610e-02, -4.711e-03, -3.051e-03, -5.856e-02, 2.500e-02, 6.084e-02, -1.612e-02, -6.092e-02, -4.106e-02));
	r += mul(s3_6, M4(-4.519e-02, 6.426e-02, -5.022e-02, -6.991e-02, -6.298e-03, 3.535e-02, 4.076e-02, -1.325e-02, 4.969e-02, 1.608e-02, -5.519e-03, -1.369e-02, -7.312e-02, 6.747e-02, 6.830e-02, 3.065e-03));
	r += mul(s3_7, M4(-4.999e-02, -1.162e-02, 1.204e-01, 2.076e-01, 8.110e-02, 4.946e-02, -5.974e-02, -8.557e-02, 1.209e-02, 2.526e-02, 3.859e-02, -5.364e-02, 1.788e-02, 5.056e-02, 2.686e-02, 7.050e-02));
	r += mul(s3_8, M4(7.296e-02, -7.147e-02, -1.277e-01, 1.150e-01, -4.047e-02, -2.960e-02, 3.931e-02, 1.868e-02, 5.949e-02, 3.640e-02, 8.097e-03, 1.534e-02, 2.215e-02, -2.979e-02, 3.785e-02, -4.045e-02));
	r += mul(s4_0, M4(2.419e-02, -3.334e-02, -6.239e-02, 1.849e-02, -1.288e-01, 3.855e-03, 8.238e-03, -3.097e-03, -2.360e-02, 3.227e-03, -2.656e-02, -7.296e-02, -2.975e-02, 1.910e-02, 3.925e-02, -1.792e-02));
	r += mul(s4_1, M4(-2.377e-02, 1.642e-02, -1.290e-02, 5.035e-02, -8.455e-03, -9.846e-02, 2.072e-02, -1.479e-01, 1.688e-01, 8.426e-02, 7.203e-02, 6.841e-02, -5.795e-02, 1.265e-01, 1.150e-02, 6.977e-02));
	r += mul(s4_2, M4(-1.809e-02, -9.698e-03, 1.761e-02, 1.454e-02, -8.195e-03, -6.953e-02, 4.992e-02, -4.566e-02, -7.422e-02, -1.824e-02, -4.712e-02, -5.545e-03, -2.661e-02, 4.049e-02, 2.054e-02, 3.071e-02));
	r += mul(s4_3, M4(1.207e-02, -4.860e-02, 1.842e-02, 4.550e-02, 1.145e-02, -3.759e-02, -5.589e-02, -1.074e-01, 9.698e-02, 7.776e-02, -1.022e-02, 1.271e-01, -4.734e-02, 2.670e-02, -3.132e-02, -2.664e-02));
	r += mul(s4_4, M4(5.663e-02, -4.446e-02, -6.395e-02, -6.242e-02, -6.249e-02, -1.642e-01, 2.852e-02, 1.742e-01, 1.478e-01, 3.719e-01, -7.803e-02, -3.928e-01, 1.613e-01, 2.810e-01, -6.044e-02, 3.681e-02));
	r += mul(s4_5, M4(1.708e-02, 2.751e-02, 6.601e-03, 3.119e-02, -7.969e-03, 1.282e-01, -3.515e-02, -1.187e-01, 6.335e-02, -1.502e-01, -4.561e-02, -2.970e-02, -1.501e-01, 5.000e-01, -2.194e-01, 4.952e-02));
	r += mul(s4_6, M4(8.606e-02, -5.005e-02, 2.406e-02, 1.606e-01, 4.853e-02, -2.827e-02, 6.015e-02, 9.987e-02, -1.819e-02, 4.406e-02, -9.072e-03, -8.452e-03, 4.792e-04, -1.547e-02, 2.470e-02, 3.105e-03));
	r += mul(s4_7, M4(8.391e-03, 9.081e-02, -9.720e-03, -1.825e-02, 8.768e-02, -1.014e-02, -1.528e-01, -1.106e-01, 7.172e-03, 1.264e-01, -2.313e-02, -3.496e-02, -2.137e-01, -3.030e-02, 9.430e-02, -1.309e-01));
	r += mul(s4_8, M4(-2.614e-02, 4.468e-02, 4.065e-02, -2.869e-02, -1.297e-02, -2.339e-02, -2.540e-03, 1.588e-02, -4.478e-02, 3.770e-02, 3.070e-02, 7.073e-03, -4.713e-02, 8.982e-02, 1.057e-01, -1.787e-01));
	r += mul(s5_0, M4(1.929e-01, -6.049e-02, 1.261e-02, -7.127e-02, -1.489e-01, 1.863e-02, 4.513e-02, -4.118e-03, -5.224e-03, -2.984e-02, -3.773e-02, 6.289e-03, 4.134e-02, 1.385e-02, 8.470e-03, 1.358e-02));
	r += mul(s5_1, M4(1.664e-02, -1.110e-03, -2.606e-01, 1.177e-01, 5.526e-03, 6.164e-02, 1.086e-01, -9.018e-02, -1.285e-01, -8.475e-02, -7.029e-03, -4.614e-02, -1.016e-02, 6.228e-03, -6.354e-03, -1.944e-02));
	r += mul(s5_2, M4(2.205e-02, 1.012e-01, -6.228e-02, -2.201e-02, -3.456e-02, -3.355e-02, 2.056e-02, -2.969e-03, -1.091e-01, -1.215e-01, 5.271e-02, 3.107e-02, -1.952e-02, -3.789e-02, -4.389e-03, 1.075e-02));
	r += mul(s5_3, M4(1.921e-01, 1.543e-01, -1.211e-01, 3.928e-01, 4.351e-02, -2.524e-02, -6.480e-02, 1.052e-01, -2.238e-02, 2.424e-02, -3.844e-02, 9.200e-03, 4.739e-03, -9.607e-03, -1.322e-02, 3.699e-02));
	r += mul(s5_4, M4(-2.783e-01, 7.867e-02, -2.074e-01, 2.666e-01, 5.335e-02, 1.131e-01, -2.982e-01, 2.570e-02, 3.721e-02, 2.780e-02, -1.026e-01, -1.936e-01, -2.599e-04, 4.792e-02, 2.084e-02, -4.745e-02));
	r += mul(s5_5, M4(1.033e-01, 7.341e-02, -2.279e-02, -5.819e-02, -8.561e-02, 8.010e-03, -3.334e-03, -2.201e-02, 2.485e-02, -2.677e-02, -7.876e-02, 4.692e-02, -1.914e-03, -2.119e-02, 2.180e-02, 5.092e-02));
	r += mul(s5_6, M4(7.233e-03, -1.247e-01, -1.209e-02, -8.267e-02, 1.057e-03, -1.752e-02, 7.966e-02, -6.080e-03, 3.715e-02, -1.670e-03, -3.067e-03, 1.106e-01, -2.165e-02, 7.407e-04, -8.410e-03, 1.055e-03));
	r += mul(s5_7, M4(-3.666e-02, 1.788e-02, 1.756e-02, 9.302e-02, 7.233e-03, -7.879e-03, -1.701e-02, 6.506e-03, 6.389e-03, -4.760e-06, -2.765e-02, -1.567e-02, -2.391e-02, -4.882e-02, -3.183e-02, -1.139e-02));
	r += mul(s5_8, M4(1.248e-01, -5.499e-02, -1.877e-01, 3.818e-02, -1.905e-02, 5.370e-03, -2.512e-02, -2.418e-03, -4.736e-02, 2.345e-02, 1.898e-02, -5.234e-02, -7.952e-02, -5.534e-03, 6.866e-02, -1.501e-02));
	r += mul(s6_0, M4(3.349e-02, 3.433e-02, 2.367e-03, 1.707e-02, 2.834e-04, 2.968e-02, -1.011e-02, 3.636e-02, 6.394e-02, -4.141e-02, -7.244e-02, -8.106e-03, 2.991e-02, 8.409e-03, -2.334e-02, 2.244e-02));
	r += mul(s6_1, M4(3.531e-02, 4.867e-02, 5.204e-03, -1.661e-02, 1.252e-02, 3.131e-02, 3.468e-02, -1.069e-02, 2.216e-02, -5.971e-02, -2.087e-01, -1.454e-03, -8.230e-02, 1.256e-02, 3.144e-02, -6.300e-03));
	r += mul(s6_2, M4(1.198e-02, -1.093e-01, 2.009e-03, 5.850e-03, 4.185e-02, -4.072e-04, 1.377e-02, -2.387e-03, -6.416e-02, -3.365e-02, -7.202e-02, -2.289e-02, -9.483e-03, -1.216e-02, -3.992e-02, 1.798e-02));
	r += mul(s6_3, M4(-2.601e-02, 4.478e-02, 3.726e-02, 8.723e-02, -3.169e-02, -5.242e-03, 2.795e-02, -9.956e-03, -2.930e-02, -1.820e-03, -1.177e-02, -1.033e-01, 2.332e-02, -3.233e-02, 7.997e-03, 7.938e-02));
	r += mul(s6_4, M4(1.140e-02, -2.316e-02, -3.004e-02, -2.630e-02, 8.140e-03, -1.012e-01, -2.611e-03, -5.549e-02, 1.747e-01, 8.882e-02, -1.212e-01, 3.937e-02, 5.680e-02, -1.092e-02, -5.874e-02, 1.098e-01));
	r += mul(s6_5, M4(4.590e-02, 3.616e-02, -1.270e-01, -9.277e-03, 2.422e-02, -9.688e-02, -1.732e-02, -3.310e-02, -3.165e-02, 2.455e-02, -8.957e-02, 1.277e-02, -9.996e-03, 2.412e-02, 3.442e-02, -1.002e-02));
	r += mul(s6_6, M4(-1.275e-02, 5.449e-03, -4.332e-03, -1.923e-02, 6.185e-02, 2.942e-02, -2.970e-02, 1.438e-02, -5.858e-02, 1.987e-02, 5.388e-02, 1.758e-02, 1.038e-02, -2.942e-02, -6.372e-02, -5.622e-02));
	r += mul(s6_7, M4(-4.430e-02, 3.630e-02, 8.744e-03, -5.341e-02, 1.083e-01, 3.413e-02, -1.207e-01, 1.866e-02, -1.196e-02, 6.106e-02, 1.101e-01, 1.858e-03, 1.266e-02, -1.900e-02, -2.594e-02, 1.300e-02));
	r += mul(s6_8, M4(-7.216e-02, -3.953e-03, 5.978e-02, 2.634e-02, -2.210e-02, -3.100e-02, -3.031e-02, 9.694e-03, -3.416e-02, 6.459e-03, 2.769e-02, -5.562e-02, -1.805e-02, -3.524e-02, 1.197e-02, -2.079e-03));
	r += mul(s7_0, M4(2.432e-02, -3.764e-02, 2.184e-06, -9.107e-03, -3.065e-02, -1.918e-02, -2.148e-02, -5.258e-02, -3.428e-02, 4.043e-02, -7.553e-02, -2.058e-02, 3.945e-02, 3.997e-02, 4.444e-02, -1.918e-02));
	r += mul(s7_1, M4(1.699e-02, -1.853e-02, -1.586e-02, -3.566e-02, 9.624e-02, 1.364e-01, -3.323e-03, 6.777e-02, 5.119e-02, -5.110e-02, 2.388e-02, -5.701e-03, -1.320e-01, -2.138e-01, 4.052e-02, -1.073e-02));
	r += mul(s7_2, M4(3.618e-02, 7.331e-02, -2.008e-02, -4.520e-02, -3.762e-02, -2.573e-02, -4.067e-02, -6.379e-03, -3.185e-02, -1.700e-02, -2.503e-02, -1.310e-02, 4.268e-02, 8.023e-02, -2.566e-02, 1.200e-02));
	r += mul(s7_3, M4(5.186e-02, -1.451e-01, 1.294e-03, -8.585e-02, 1.538e-01, -8.961e-02, 4.282e-02, 1.006e-01, 1.072e-01, -1.157e-01, -1.704e-02, 1.783e-02, 7.231e-02, 3.331e-03, 2.005e-02, -3.466e-02));
	r += mul(s7_4, M4(8.486e-02, -2.125e-01, 3.064e-02, 3.166e-01, 5.850e-02, 1.479e-01, -9.927e-02, -3.233e-01, 3.735e-01, 6.033e-03, -2.300e-01, -2.835e-01, 1.770e-02, 1.745e-01, -1.580e-01, -6.637e-02));
	r += mul(s7_5, M4(-2.507e-02, 2.838e-03, 6.493e-02, 4.726e-02, -1.672e-01, -2.349e-01, 1.630e-01, 8.437e-02, 6.116e-02, 3.310e-02, -5.197e-02, 2.226e-02, 4.672e-03, -1.372e-01, 8.634e-03, -1.429e-02));
	r += mul(s7_6, M4(-3.649e-02, -1.269e-02, -1.024e-02, 2.659e-02, -1.143e-02, 1.632e-02, -2.783e-02, -1.192e-01, 5.262e-02, 3.661e-03, -4.617e-02, -2.578e-02, 1.772e-02, -1.441e-01, -5.866e-02, -8.424e-02));
	r += mul(s7_7, M4(-8.299e-02, -9.926e-02, 5.071e-02, -3.511e-02, -2.202e-01, -2.311e-01, 2.417e-01, 3.174e-01, 6.560e-03, 3.771e-02, -6.069e-02, 8.530e-02, 3.150e-02, -1.810e-03, 1.121e-02, -3.748e-02));
	r += mul(s7_8, M4(-3.565e-02, 4.143e-02, 1.022e-02, -8.715e-02, 5.984e-02, 1.291e-01, -1.059e-02, -2.838e-01, 3.367e-02, 1.852e-02, 1.254e-02, -5.066e-02, 5.227e-03, -4.538e-03, -1.702e-02, 6.824e-03));
	r += V4(1.181e-03, 8.230e-03, -8.373e-04, -2.226e-04);
	return r;
}

V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) {
	V4 r = 0.0;
	r += mul(s0_0, M4(3.735e-02, 7.321e-03, -9.063e-02, 2.655e-02, 5.782e-03, -1.955e-02, 4.747e-03, -1.940e-02, 4.638e-02, -4.222e-03, 1.020e-01, -2.915e-02, -6.362e-02, -1.949e-02, 8.342e-03, 1.491e-02));
	r += mul(s0_1, M4(-4.903e-02, 9.009e-02, -4.578e-02, 5.669e-02, 5.086e-02, 3.943e-02, -1.008e-01, -1.949e-03, -1.187e-01, 1.094e-01, -1.198e-01, 8.811e-02, -2.927e-02, 1.096e-02, 4.834e-02, 1.965e-02));
	r += mul(s0_2, M4(1.657e-02, 5.933e-03, -2.361e-02, 7.328e-02, 3.851e-02, -8.436e-04, 4.338e-03, 2.002e-04, -3.099e-03, -4.612e-03, 6.618e-02, -5.134e-02, 4.999e-02, -1.884e-02, -9.779e-03, 4.197e-02));
	r += mul(s0_3, M4(-2.789e-02, -1.824e-02, -3.909e-02, 4.040e-02, 1.013e-02, 8.704e-02, 1.274e-02, -3.519e-02, -2.138e-01, 2.234e-02, -2.230e-01, 5.964e-02, 1.265e-01, -1.117e-02, 3.896e-02, -2.179e-02));
	r += mul(s0_4, M4(-3.369e-02, 3.132e-02, -2.603e-02, -1.647e-01, -7.495e-02, -7.839e-02, -3.332e-02, -9.295e-02, -4.065e-02, -1.203e-01, -7.278e-03, -4.638e-01, -6.658e-02, 7.366e-04, 3.104e-02, -1.348e-01));
	r += mul(s0_5, M4(-2.310e-02, 3.044e-02, -2.977e-02, 6.350e-02, 1.043e-02, 4.835e-03, -1.005e-03, 1.369e-02, 2.835e-02, -4.378e-02, -1.284e-02, 1.836e-01, -1.514e-01, 2.900e-04, -4.419e-03, 3.467e-02));
	r += mul(s0_6, M4(-1.773e-02, 8.370e-02, -4.369e-02, 4.831e-03, 6.303e-03, -2.615e-02, -4.635e-02, 1.748e-02, 1.000e-01, 5.347e-02, -2.080e-02, 8.705e-02, -4.790e-02, -2.085e-02, 2.236e-02, -7.095e-02));
	r += mul(s0_7, M4(8.422e-03, 4.460e-02, -4.504e-02, -5.108e-02, 1.772e-02, 2.164e-02, -4.921e-02, 9.575e-02, 2.302e-01, 6.677e-02, 9.069e-02, -2.652e-02, -6.605e-03, 2.440e-02, -6.203e-02, 7.330e-03));
	r += mul(s0_8, M4(5.511e-02, 1.542e-02, -5.059e-03, 3.171e-02, -6.442e-02, -5.679e-03, 1.145e-02, 2.220e-05, 1.217e-01, -7.622e-03, 7.413e-03, 7.513e-02, 5.875e-02, -7.754e-03, -1.073e-02, 6.121e-02));
	r += mul(s1_0, M4(2.168e-02, -4.579e-02, 4.918e-02, -1.727e-02, 4.170e-02, -1.246e-01, 5.060e-02, 8.042e-03, 2.092e-02, -7.061e-03, 2.682e-02, -4.390e-03, -5.984e-03, 2.385e-02, 3.160e-02, -2.543e-02));
	r += mul(s1_1, M4(-7.658e-02, 5.386e-02, -7.724e-03, -5.315e-02, 6.599e-02, 1.275e-01, -2.768e-02, 6.249e-02, 3.247e-02, -3.022e-03, 1.086e-02, -3.709e-03, 3.864e-02, -4.664e-02, 6.696e-02, 8.100e-03));
	r += mul(s1_2, M4(-6.044e-02, -1.413e-02, -5.048e-03, -3.623e-02, 3.267e-02, -2.345e-03, -2.573e-02, 2.255e-02, 4.310e-02, 2.143e-03, 1.195e-02, -1.254e-03, 3.588e-02, 5.696e-04, 3.540e-02, -5.437e-02));
	r += mul(s1_3, M4(5.584e-02, 2.195e-02, 1.284e-01, -2.934e-02, -1.995e-02, 1.069e-01, 1.716e-02, 3.259e-02, 2.022e-02, 6.430e-02, 1.950e-02, -1.204e-03, -8.247e-03, -1.034e-01, 4.017e-02, -1.014e-01));
	r += mul(s1_4, M4(7.738e-02, -7.787e-02, -8.559e-02, -3.832e-01, -1.601e-01, 1.303e-02, 1.333e-01, -5.981e-01, 4.252e-02, -3.160e-02, 3.228e-02, -3.681e-03, -1.285e-01, 2.940e-01, 2.559e-01, 1.676e-02));
	r += mul(s1_5, M4(-1.728e-02, -9.817e-03, 2.978e-02, -1.828e-01, 1.827e-02, -1.183e-02, -3.527e-02, -8.733e-03, 5.358e-02, 2.633e-02, 1.640e-02, -2.296e-02, 2.498e-02, -5.456e-03, 4.117e-02, 1.054e-01));
	r += mul(s1_6, M4(1.625e-02, 3.119e-02, 2.967e-02, -4.471e-02, 3.823e-02, -6.555e-02, -2.538e-02, 8.332e-02, 2.353e-02, -3.428e-03, 2.937e-02, 1.472e-02, 1.979e-01, 1.279e-01, 2.702e-02, 4.688e-02));
	r += mul(s1_7, M4(-2.596e-02, 1.838e-02, -1.123e-02, -1.558e-01, 7.660e-02, -4.095e-02, -2.568e-03, 1.184e-01, 5.229e-02, 2.339e-02, -1.010e-02, 1.139e-02, -6.036e-02, -1.079e-01, -7.627e-02, -1.255e-01));
	r += mul(s1_8, M4(2.202e-02, -6.740e-03, 4.657e-02, -1.208e-01, 1.782e-03, 1.630e-02, 1.916e-02, -2.809e-02, 1.417e-02, -7.265e-04, -5.384e-04, 5.627e-03, -1.229e-03, 1.043e-02, -2.705e-02, 1.526e-01));
	r += mul(s2_0, M4(2.447e-02, 1.729e-02, -2.765e-02, 5.449e-02, -4.221e-02, -8.931e-02, 3.146e-02, -2.128e-02, -2.070e-02, 1.432e-03, -3.099e-02, -1.587e-02, 6.266e-03, -3.164e-02, -3.437e-02, 2.344e-02));
	r += mul(s2_1, M4(-9.059e-02, 5.509e-02, 3.072e-02, 1.440e-02, 9.820e-03, -1.885e-02, -5.084e-02, -7.885e-02, -2.189e-02, -1.145e-01, 3.476e-02, -5.418e-02, -1.010e-01, -1.096e-03, -3.446e-03, -1.668e-02));
	r += mul(s2_2, M4(-9.562e-02, 5.623e-03, -1.658e-02, 3.662e-02, -2.377e-02, -3.407e-02, -3.480e-02, 7.729e-02, -4.953e-02, 1.609e-02, 1.767e-02, -1.126e-01, -2.488e-02, -3.236e-02, -3.311e-03, -1.805e-02));
	r += mul(s2_3, M4(1.759e-02, -2.190e-02, -5.458e-02, 2.657e-02, 3.511e-02, -2.803e-01, 4.074e-02, -8.826e-02, 3.244e-02, 8.148e-02, 4.088e-02, 4.509e-02, -9.224e-02, -2.484e-04, -4.308e-02, -1.777e-03));
	r += mul(s2_4, M4(9.104e-04, 3.797e-02, -5.060e-02, 2.746e-03, 2.471e-01, 1.048e-01, 1.037e-01, 3.233e-01, 1.347e-01, 1.900e-03, 1.886e-02, -3.936e-01, -3.162e-02, 7.359e-02, 9.111e-02, -3.810e-02));
	r += mul(s2_5, M4(1.120e-02, 3.946e-02, 3.958e-03, 3.405e-02, -3.474e-02, -3.862e-02, -7.786e-03, 1.777e-01, 1.561e-02, 5.411e-03, 5.232e-02, -2.836e-01, 1.983e-02, 3.910e-03, 4.503e-02, -2.982e-02));
	r += mul(s2_6, M4(2.452e-02, 3.821e-02, -3.523e-02, 1.539e-02, -4.772e-02, 1.718e-02, -3.666e-02, -6.527e-02, 1.994e-02, -6.181e-03, 5.485e-02, -4.438e-03, 4.088e-02, 3.997e-02, 1.727e-02, 5.171e-02));
	r += mul(s2_7, M4(6.000e-02, 3.510e-02, 3.557e-02, -8.980e-02, -5.613e-02, -4.479e-02, -1.678e-01, 5.538e-02, 5.004e-02, 1.512e-02, 2.275e-02, -1.108e-01, 1.488e-01, 2.930e-03, -1.023e-02, 3.367e-02));
	r += mul(s2_8, M4(6.663e-03, -1.386e-03, -1.365e-02, -2.420e-02, -5.310e-02, 2.358e-02, -1.795e-02, -2.243e-02, 1.510e-02, 4.362e-03, 8.571e-05, -9.362e-02, 5.626e-03, -1.368e-02, -1.471e-03, 3.142e-02));
	r += mul(s3_0, M4(-3.950e-02, -4.733e-02, 9.475e-02, -4.892e-02, 9.380e-02, -1.509e-02, 1.450e-02, -2.095e-02, -3.504e-02, 2.453e-02, -5.969e-02, 2.178e-03, 6.615e-02, 7.004e-02, 8.001e-02, -6.070e-02));
	r += mul(s3_1, M4(-1.676e-02, -1.485e-02, 2.666e-01, 8.033e-02, -4.579e-03, -3.812e-02, 3.769e-03, 3.362e-03, -9.428e-02, -3.817e-02, -3.272e-02, 1.008e-01, -3.819e-02, 7.791e-02, -7.914e-02, 2.262e-02));
	r += mul(s3_2, M4(-2.685e-02, 2.254e-02, 3.175e-02, -7.497e-02, -5.441e-03, 3.000e-03, -3.112e-03, 2.195e-02, 5.569e-03, 1.282e-02, 2.887e-02, 1.238e-02, -1.596e-01, 1.621e-02, 1.791e-02, -1.396e-01));
	r += mul(s3_3, M4(9.641e-02, -1.211e-02, 1.318e-01, -2.892e-02, 9.309e-02, -1.012e-01, 5.106e-02, -2.608e-02, 3.708e-02, 8.899e-03, -7.867e-03, 6.110e-02, -2.357e-01, 2.636e-01, -1.759e-01, 3.462e-02));
	r += mul(s3_4, M4(1.184e-01, -6.208e-02, -7.662e-02, -1.109e-01, 1.660e-01, 2.238e-02, 2.532e-02, -2.775e-02, 6.642e-02, 1.673e-01, -1.187e-02, -1.796e-01, 3.810e-02, -1.789e-01, 3.771e-02, 5.403e-01));
	r += mul(s3_5, M4(-1.026e-03, 3.177e-02, -7.086e-02, 1.776e-01, 2.638e-02, 3.480e-02, 1.754e-02, 5.252e-02, 5.514e-03, 6.224e-03, 4.988e-04, 1.556e-03, 7.209e-02, 2.107e-02, 3.573e-02, -2.720e-01));
	r += mul(s3_6, M4(-7.826e-02, -2.392e-02, 7.209e-02, -7.598e-02, -1.960e-02, -9.385e-03, 3.182e-02, -2.384e-02, -3.284e-03, 2.330e-02, 2.146e-02, 3.374e-02, -3.855e-02, -2.250e-02, -2.083e-02, 2.701e-02));
	r += mul(s3_7, M4(-1.451e-01, 4.434e-02, -8.629e-02, 1.044e-02, 1.889e-01, 2.243e-02, 3.330e-03, -2.253e-02, 7.287e-03, -1.563e-02, 2.876e-03, -1.321e-02, -3.684e-02, 8.823e-02, 2.742e-02, 4.708e-02));
	r += mul(s3_8, M4(-6.824e-02, 2.854e-02, -6.687e-03, -9.146e-02, 6.651e-02, -1.166e-02, 1.677e-02, -5.796e-02, 7.355e-03, 8.139e-03, -2.706e-02, 5.048e-02, 3.256e-02, -7.823e-03, 3.660e-03, -1.004e-01));
	r += mul(s4_0, M4(-9.431e-03, 5.889e-02, -2.126e-02, -1.830e-02, 2.106e-02, -9.595e-02, 7.357e-02, -6.164e-02, -3.097e-03, -7.961e-02, 1.315e-01, 2.196e-02, 1.916e-02, 5.432e-02, 9.130e-03, 1.871e-02));
	r += mul(s4_1, M4(1.210e-01, 3.681e-02, 2.575e-02, 5.345e-02, -6.667e-02, 2.085e-02, -5.452e-02, -1.316e-01, 8.277e-02, 1.974e-02, -4.569e-03, 6.749e-02, 4.601e-02, -6.241e-03, 4.080e-01, 9.546e-02));
	r += mul(s4_2, M4(6.069e-02, 1.337e-02, 3.668e-03, 4.510e-02, 3.752e-02, -6.577e-02, 6.636e-02, -6.562e-02, -2.238e-02, 2.513e-03, -3.745e-02, 1.332e-01, -1.584e-01, -1.608e-02, 1.324e-01, -1.189e-01));
	r += mul(s4_3, M4(-1.071e-01, 2.279e-02, -3.308e-02, -1.314e-03, 4.047e-03, 5.535e-02, 5.889e-02, 1.296e-02, 9.058e-02, 4.993e-02, 7.142e-02, 3.842e-02, -3.450e-02, -1.056e-01, 3.612e-02, 1.292e-02));
	r += mul(s4_4, M4(1.172e-02, -9.352e-03, 5.152e-02, -5.238e-03, -3.401e-02, 9.417e-03, -2.763e-02, 5.862e-02, 2.160e-01, -1.091e-01, 2.705e-02, -1.936e-01, -5.915e-02, 1.654e-01, 5.814e-01, -7.901e-02));
	r += mul(s4_5, M4(1.396e-01, 3.896e-02, 1.335e-04, -3.864e-02, -5.279e-02, -4.169e-02, -2.619e-02, 7.096e-02, 9.102e-02, -3.263e-02, 7.145e-03, -5.895e-02, 5.079e-02, 2.301e-01, 2.891e-01, -8.593e-02));
	r += mul(s4_6, M4(1.144e-02, 2.328e-02, -7.695e-02, -1.343e-03, 5.186e-02, 6.368e-02, -2.607e-02, -2.692e-02, -5.042e-02, -4.400e-02, 1.485e-02, 2.958e-03, 4.236e-02, 5.249e-03, -1.026e-01, 2.812e-02));
	r += mul(s4_7, M4(5.092e-02, -3.809e-03, 4.445e-02, 2.612e-02, 1.142e-01, -7.595e-02, 5.381e-02, 1.712e-02, -1.065e-01, -7.024e-03, 2.167e-02, -9.190e-02, 6.058e-02, -9.443e-02, 1.459e-01, -4.769e-02));
	r += mul(s4_8, M4(3.684e-04, -1.860e-02, -1.058e-02, 1.948e-02, 2.465e-03, -5.119e-03, 2.769e-02, 1.458e-02, -2.639e-02, 8.080e-03, -1.704e-02, -2.213e-02, 5.268e-02, -5.988e-02, 3.712e-02, 1.254e-01));
	r += mul(s5_0, M4(-2.496e-02, 8.809e-02, -1.426e-01, 6.392e-02, 3.160e-02, -6.681e-02, 8.480e-02, -7.819e-02, -2.165e-02, 2.542e-02, -1.916e-02, 2.132e-02, -3.296e-02, 1.161e-02, -1.487e-02, 2.013e-02));
	r += mul(s5_1, M4(2.416e-01, 1.343e-02, 4.903e-02, 6.341e-02, -9.003e-02, -7.302e-02, -1.000e-01, -5.732e-03, -2.697e-02, 5.232e-02, -8.220e-02, -3.580e-02, 9.402e-03, -1.418e-03, -1.429e-02, 7.107e-03));
	r += mul(s5_2, M4(1.023e-01, 2.851e-02, -1.219e-02, 3.114e-02, 9.954e-03, -2.428e-02, 4.689e-02, 8.756e-03, -1.618e-02, 1.071e-02, -1.172e-02, -8.114e-03, -3.601e-02, 3.402e-02, -7.213e-03, -1.546e-02));
	r += mul(s5_3, M4(2.819e-02, 3.272e-01, -3.521e-02, -1.169e-01, 1.235e-01, 1.380e-01, 1.391e-01, -1.713e-02, -4.166e-02, -1.108e-01, -5.924e-02, 6.759e-02, -4.242e-02, 1.073e-02, -3.557e-02, -3.206e-02));
	r += mul(s5_4, M4(-1.297e-03, -7.965e-02, 2.005e-01, 3.798e-01, 1.215e-01, -3.609e-02, -8.034e-02, 6.808e-02, 4.566e-02, -1.028e-02, 1.626e-02, 2.790e-02, -6.773e-02, -5.215e-02, -1.234e-02, 1.084e-02));
	r += mul(s5_5, M4(1.163e-01, 6.928e-02, -7.286e-02, -7.548e-02, -7.859e-02, -3.504e-02, 1.962e-02, -1.861e-02, 6.673e-02, 5.095e-03, 4.188e-02, 3.247e-02, 5.449e-02, -3.710e-02, -4.225e-03, -6.734e-03));
	r += mul(s5_6, M4(-6.793e-02, -8.408e-02, -1.155e-01, 6.691e-02, -6.385e-03, -1.279e-02, 7.386e-02, -2.486e-02, 2.807e-02, 7.187e-02, -6.588e-02, 2.863e-02, 3.158e-02, 4.317e-03, -1.664e-02, -9.957e-03));
	r += mul(s5_7, M4(4.247e-02, 7.270e-02, 1.199e-02, -2.256e-01, 3.120e-02, 3.151e-03, -1.541e-02, -9.396e-02, 2.435e-02, 9.246e-03, 8.851e-04, -8.081e-02, 3.958e-02, 1.086e-02, -9.265e-04, -3.038e-02));
	r += mul(s5_8, M4(4.742e-02, 5.148e-02, -3.171e-02, -1.373e-01, 5.415e-02, -9.496e-03, 1.975e-02, 3.439e-03, -5.139e-02, -2.697e-02, 1.210e-02, -1.295e-02, -3.452e-02, -2.794e-03, 1.383e-02, 1.658e-02));
	r += mul(s6_0, M4(-2.507e-02, -7.946e-04, 1.110e-02, -4.932e-04, 2.526e-02, 9.996e-03, 2.226e-02, -1.076e-02, -6.662e-03, -3.381e-02, -2.353e-02, 5.043e-03, 3.321e-03, 2.826e-02, 4.069e-02, 5.503e-03));
	r += mul(s6_1, M4(3.050e-02, -2.427e-03, -1.249e-01, -4.679e-03, 3.654e-03, -4.095e-02, -3.797e-03, 5.638e-03, 4.113e-03, 3.107e-02, -3.934e-02, -6.310e-02, -3.349e-02, 7.328e-02, 3.905e-02, 5.081e-02));
	r += mul(s6_2, M4(2.691e-02, -3.391e-02, -1.199e-02, 4.570e-03, -6.239e-02, 2.360e-03, -3.945e-02, -6.586e-03, -2.278e-02, 2.032e-04, 7.616e-03, 2.224e-02, 1.718e-03, 1.473e-02, 1.707e-02, -1.987e-02));
	r += mul(s6_3, M4(-4.025e-03, 2.814e-02, 6.370e-02, -3.235e-02, -1.529e-02, 3.723e-02, -1.565e-03, 1.399e-02, 7.093e-02, -7.519e-02, 4.052e-02, 1.021e-02, 4.772e-02, 3.930e-02, -1.189e-01, -1.362e-02));
	r += mul(s6_4, M4(-3.540e-02, 8.062e-02, -1.039e-01, 1.217e-02, 3.187e-03, 1.265e-02, -4.382e-02, -2.559e-02, 1.570e-01, -7.089e-02, 8.914e-03, 7.153e-02, 1.327e-01, -9.643e-02, -5.999e-02, 7.664e-02));
	r += mul(s6_5, M4(-2.217e-02, 1.164e-02, -1.264e-02, 4.551e-02, -6.797e-02, 1.238e-03, 3.343e-02, 9.795e-03, 1.598e-01, 3.646e-02, 4.413e-02, -1.461e-02, 2.523e-03, -5.439e-04, -4.791e-02, 1.033e-03));
	r += mul(s6_6, M4(1.176e-02, 1.414e-02, -3.136e-03, -2.034e-03, 2.634e-02, 4.214e-02, 2.754e-02, 3.087e-02, -3.876e-02, -4.940e-02, -3.528e-02, -6.377e-03, -1.154e-02, -3.120e-02, 8.880e-02, 2.014e-02));
	r += mul(s6_7, M4(4.944e-02, 3.875e-03, 2.216e-02, -5.348e-02, 4.101e-02, 7.348e-02, -7.037e-02, 5.307e-02, -1.714e-01, 1.506e-02, -4.873e-02, -5.804e-02, -2.607e-02, 1.517e-02, 5.446e-02, -2.876e-02));
	r += mul(s6_8, M4(-7.465e-02, 7.315e-04, 9.037e-03, 1.764e-02, 3.369e-02, 5.548e-04, 5.357e-02, 2.939e-02, 4.354e-02, -1.601e-02, -3.103e-03, -7.247e-02, 5.881e-02, -7.223e-03, 2.963e-02, -1.031e-01));
	r += mul(s7_0, M4(-5.706e-02, -5.552e-03, -1.280e-01, -2.101e-02, -1.691e-02, -3.833e-02, 4.682e-02, -3.053e-03, -2.772e-02, -1.184e-01, 5.773e-02, -3.441e-02, 4.748e-02, -2.675e-02, -1.698e-01, 5.623e-02));
	r += mul(s7_1, M4(-1.115e-01, -5.093e-02, -2.015e-01, -1.037e-01, 7.328e-02, 4.829e-02, 6.089e-02, 5.624e-02, -1.812e-01, -6.170e-03, -6.281e-02, -6.716e-02, -8.051e-02, 3.991e-02, 7.369e-02, -7.423e-02));
	r += mul(s7_2, M4(8.941e-02, -1.362e-01, 4.570e-02, 5.541e-02, -1.336e-01, 3.946e-02, -1.285e-01, -6.767e-02, -3.703e-02, 2.243e-02, 1.189e-02, 2.213e-02, 7.284e-03, 1.078e-02, -3.074e-02, 6.708e-02));
	r += mul(s7_3, M4(-9.038e-02, -5.443e-02, -2.140e-01, 2.917e-02, -8.495e-02, 1.500e-01, -1.034e-01, -6.819e-03, 1.175e-01, -2.877e-02, 3.825e-02, 1.543e-02, -6.762e-02, 6.191e-02, -3.292e-02, 1.214e-01));
	r += mul(s7_4, M4(1.351e-01, 1.929e-01, -4.921e-01, 5.014e-02, -2.351e-01, -3.016e-01, -1.121e-01, 3.097e-01, -1.658e-01, -1.272e-01, 7.207e-02, -3.743e-01, 3.631e-01, -1.412e-01, -1.938e-01, -8.993e-02));
	r += mul(s7_5, M4(-4.301e-02, 7.487e-02, 2.500e-02, 1.069e-01, -1.016e-01, 4.391e-02, 1.315e-01, 5.845e-02, 4.127e-02, 1.446e-02, -1.786e-02, -8.449e-02, -4.528e-02, -1.804e-02, -2.909e-02, -8.959e-02));
	r += mul(s7_6, M4(3.403e-02, 5.445e-02, -8.672e-03, -5.562e-02, 1.259e-01, 7.118e-02, -1.102e-03, 6.377e-03, -6.440e-03, -3.774e-02, -1.121e-02, -4.780e-02, 2.907e-03, -6.161e-02, 4.809e-02, 6.793e-02));
	r += mul(s7_7, M4(3.418e-02, -1.906e-02, -4.124e-02, -6.526e-02, -1.621e-01, -3.490e-02, -4.261e-02, -1.441e-01, -1.343e-02, 3.143e-02, 8.302e-03, 1.160e-01, -4.174e-02, 7.590e-03, 1.030e-01, -2.129e-01));
	r += mul(s7_8, M4(6.975e-03, 1.297e-02, 3.317e-02, -8.614e-02, -1.193e-01, 6.797e-02, -2.014e-02, -6.614e-02, 9.511e-02, 1.817e-02, 6.716e-03, 2.764e-02, -4.762e-03, -4.361e-03, 1.822e-02, -7.534e-02));
	r += V4(-7.831e-03, -2.140e-04, -1.201e-04, 7.332e-03);
	return r;
}

V4 f2(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) {
	V4 r = 0.0;
	r += mul(s0_0, M4(-6.422e-02, 1.680e-02, -4.666e-02, -4.495e-02, -2.339e-03, 8.326e-03, -1.462e-02, -4.952e-02, 5.134e-02, -1.934e-02, 4.511e-02, 1.334e-01, -4.618e-03, -8.103e-03, 3.456e-02, 1.580e-02));
	r += mul(s0_1, M4(-1.842e-03, -3.105e-02, -2.950e-03, 1.885e-02, -3.407e-02, -3.604e-03, -2.389e-03, -5.071e-02, -8.016e-02, -3.803e-02, -8.330e-02, 1.628e-01, 1.962e-02, 3.725e-02, 2.377e-02, 3.499e-02));
	r += mul(s0_2, M4(-7.064e-03, 6.164e-03, -4.705e-02, -8.524e-03, -5.537e-03, 3.894e-02, -2.029e-02, -2.486e-02, -9.182e-03, 1.519e-01, -1.817e-02, 6.079e-02, -9.486e-03, -2.110e-02, 3.287e-02, -2.019e-03));
	r += mul(s0_3, M4(-1.171e-01, -5.278e-02, 7.026e-02, 4.255e-02, 1.090e-02, -5.941e-02, -3.697e-02, -3.014e-02, -1.685e-01, 2.513e-02, -4.012e-02, 8.469e-03, 1.878e-02, -1.719e-02, -8.298e-03, 1.018e-01));
	r += mul(s0_4, M4(-6.699e-02, 5.956e-04, -9.047e-02, -1.346e-01, 2.411e-02, -4.314e-02, 3.185e-02, -9.635e-02, 2.837e-01, -3.088e-02, -2.452e-02, -1.238e-01, -9.648e-02, 4.971e-02, -4.276e-02, -8.128e-02));
	r += mul(s0_5, M4(3.128e-02, -6.913e-02, -1.041e-02, -7.746e-02, -4.009e-02, -8.374e-02, 4.016e-02, 8.426e-03, 4.522e-03, 1.049e-01, 9.970e-02, -3.086e-01, -6.120e-03, -1.388e-01, -1.063e-01, -5.434e-02));
	r += mul(s0_6, M4(-3.365e-02, -3.072e-02, -9.321e-02, -1.832e-02, -5.298e-03, 7.121e-04, 2.169e-03, 4.990e-02, -8.102e-02, 4.921e-02, -5.902e-02, -2.201e-02, 1.276e-02, 3.207e-02, 3.180e-02, -1.999e-02));
	r += mul(s0_7, M4(-9.262e-02, -1.065e-01, -6.215e-02, -9.939e-02, 7.738e-02, -1.434e-02, 9.368e-03, 8.076e-02, 8.899e-03, -1.052e-01, -1.082e-01, 4.778e-02, -1.707e-01, -5.856e-02, -6.962e-02, -1.961e-02));
	r += mul(s0_8, M4(3.140e-02, 1.377e-02, 1.593e-04, -2.125e-02, 9.562e-03, 3.623e-02, -4.402e-02, -2.442e-02, -8.968e-02, 1.988e-01, 1.410e-02, 5.882e-02, 7.932e-02, 9.126e-02, 6.180e-02, -4.356e-02));
	r += mul(s1_0, M4(-1.858e-02, -9.682e-04, 1.851e-02, -2.569e-02, -1.704e-02, -5.016e-03, 4.800e-02, -9.740e-02, -5.270e-03, -6.199e-03, -2.214e-02, -2.733e-02, 3.645e-03, 5.273e-03, -2.155e-02, 1.597e-02));
	r += mul(s1_1, M4(2.906e-02, 6.890e-03, -2.221e-02, 5.509e-02, -1.067e-01, 3.076e-01, -1.190e-01, -3.612e-04, -7.062e-03, -2.028e-02, 5.407e-03, 3.502e-02, 4.203e-02, -5.999e-03, 6.040e-02, -9.000e-03));
	r += mul(s1_2, M4(-1.598e-02, -2.484e-02, -1.963e-02, 9.807e-03, 2.597e-02, -8.287e-02, 1.311e-02, -6.579e-02, 1.673e-02, 2.149e-02, 5.862e-03, -2.455e-03, -7.638e-04, 9.753e-03, -1.168e-02, 3.686e-03));
	r += mul(s1_3, M4(-2.996e-02, 1.216e-02, 2.214e-02, 1.757e-02, -1.134e-02, 5.456e-02, -3.628e-02, 4.101e-02, -1.514e-02, -2.445e-02, 6.486e-03, -4.812e-02, 4.947e-02, 1.221e-02, 1.329e-02, 4.396e-02));
	r += mul(s1_4, M4(1.473e-02, 1.606e-01, -5.135e-02, -3.255e-02, -1.753e-01, -5.682e-01, 1.286e-01, -4.066e-02, 8.350e-02, 4.364e-02, 9.298e-03, 1.028e-01, -3.886e-02, 3.602e-02, -2.097e-01, 2.511e-03));
	r += mul(s1_5, M4(3.637e-02, -9.506e-03, 6.912e-02, -1.290e-02, -2.436e-03, 1.188e-01, 5.628e-02, 1.431e-02, -2.094e-02, -1.958e-02, -3.403e-02, 4.689e-02, -7.153e-03, -3.282e-02, -3.716e-02, 1.626e-01));
	r += mul(s1_6, M4(-2.461e-02, 5.353e-02, -3.433e-02, 3.605e-02, -6.121e-02, -3.178e-03, 5.595e-02, 1.794e-02, -6.023e-02, -3.588e-03, -3.785e-02, 3.699e-02, 1.142e-01, 2.821e-02, -5.308e-02, 6.403e-02));
	r += mul(s1_7, M4(5.947e-02, -7.719e-02, 4.145e-02, -7.409e-03, -1.055e-01, 1.610e-01, -4.151e-02, 1.995e-02, 6.130e-02, -2.186e-02, -1.589e-02, 1.102e-01, 1.114e-01, -4.216e-02, 1.647e-03, -1.100e-01));
	r += mul(s1_8, M4(7.085e-03, 2.188e-02, -6.992e-03, -2.332e-03, 1.478e-02, -6.092e-02, -4.126e-02, 3.169e-04, -1.958e-02, -4.985e-03, -1.923e-03, -3.218e-02, 1.927e-02, -8.432e-03, -4.363e-02, 1.668e-03));
	r += mul(s2_0, M4(-6.546e-03, 1.343e-02, 2.296e-02, 5.659e-02, 2.752e-02, -4.231e-02, 6.074e-02, -5.683e-03, -3.253e-02, -5.294e-02, 1.101e-02, 9.926e-03, -1.273e-02, 3.820e-04, 4.827e-02, -4.940e-04));
	r += mul(s2_1, M4(4.736e-02, 2.476e-02, -9.261e-03, 7.812e-02, -1.871e-02, 1.027e-02, -4.256e-02, 1.155e-02, 2.293e-02, 2.758e-02, -1.940e-02, -1.913e-02, 1.163e-02, -1.837e-02, -1.667e-02, 1.094e-02));
	r += mul(s2_2, M4(-1.668e-02, -5.029e-02, -1.440e-02, 4.319e-02, 1.579e-02, -1.302e-02, 5.899e-02, 2.061e-02, -6.245e-03, -1.449e-02, 3.008e-02, -1.664e-02, 2.230e-02, 9.839e-03, 5.975e-02, 1.928e-02));
	r += mul(s2_3, M4(-1.839e-02, -2.045e-02, -4.858e-02, 3.903e-02, 7.104e-03, 1.747e-02, -5.856e-02, -1.094e-01, 1.239e-02, 2.181e-02, -1.091e-01, -8.099e-02, -4.212e-02, 3.092e-03, 4.736e-02, -7.150e-02));
	r += mul(s2_4, M4(-7.680e-03, 2.967e-02, -2.112e-02, -1.317e-02, -2.971e-01, -2.871e-01, 3.943e-01, -2.758e-01, 8.246e-02, 1.079e-01, 4.440e-03, 1.274e-01, 1.480e-02, 1.701e-03, -8.268e-02, 1.860e-02));
	r += mul(s2_5, M4(1.855e-02, 4.260e-02, -4.680e-02, -1.964e-02, 4.660e-02, 2.995e-01, -8.861e-02, 3.235e-02, 2.948e-02, 2.388e-03, 3.504e-03, -2.162e-03, 1.332e-02, 1.133e-02, 3.025e-02, 1.443e-02));
	r += mul(s2_6, M4(7.442e-03, 8.911e-03, -6.702e-03, -4.585e-02, 1.291e-01, -4.421e-02, 1.339e-01, 3.993e-02, -1.566e-02, 2.829e-02, 3.262e-02, 1.285e-02, -7.299e-03, 2.724e-02, -3.766e-02, 1.047e-02));
	r += mul(s2_7, M4(-3.381e-02, -1.174e-02, -4.296e-02, 1.705e-03, -4.339e-03, 2.267e-01, 7.326e-02, -2.796e-03, 6.150e-02, 3.889e-02, -6.071e-04, 9.139e-02, -3.776e-02, 2.394e-02, -5.208e-02, 5.949e-02));
	r += mul(s2_8, M4(-3.450e-02, -3.383e-02, 7.836e-03, 1.100e-02, 1.184e-02, -1.977e-01, -2.924e-02, -4.890e-02, -3.583e-03, -4.512e-02, -1.497e-02, 4.827e-02, 2.246e-03, 2.053e-02, 2.313e-02, -4.874e-02));
	r += mul(s3_0, M4(5.694e-02, -5.528e-02, 5.212e-02, 2.986e-02, 1.836e-02, 3.805e-02, -2.986e-02, 5.002e-02, -2.912e-02, -2.534e-02, -3.611e-02, -2.406e-02, 6.479e-03, -5.702e-03, -1.243e-01, 3.241e-02));
	r += mul(s3_1, M4(2.800e-02, -1.076e-01, 4.683e-02, 2.143e-02, 1.158e-02, -3.525e-02, 1.007e-01, -1.794e-03, -9.832e-03, -2.611e-02, 5.176e-03, -5.500e-02, -3.821e-02, 1.060e-02, 4.090e-02, -4.569e-02));
	r += mul(s3_2, M4(1.089e-02, -1.146e-01, -1.153e-02, -5.991e-02, 1.177e-02, 3.417e-03, -2.044e-02, -1.345e-03, 1.305e-03, 1.239e-02, 1.726e-02, -3.599e-02, 1.674e-02, -5.590e-02, -8.140e-02, -4.265e-02));
	r += mul(s3_3, M4(2.527e-02, 8.669e-02, 4.049e-02, -9.185e-03, -2.413e-02, -1.824e-03, 1.046e-01, 2.065e-02, -3.119e-02, -7.965e-02, -4.623e-02, 2.911e-02, 7.088e-02, -1.067e-01, 1.576e-01, 3.075e-02));
	r += mul(s3_4, M4(-1.465e-01, 1.556e-01, 6.490e-02, 1.404e-01, -1.715e-02, 1.477e-01, 1.859e-02, 1.233e-01, 2.244e-02, 8.846e-02, 1.742e-02, 6.853e-02, 8.496e-02, 2.641e-01, -2.901e-01, 1.538e-01));
	r += mul(s3_5, M4(5.795e-02, -5.553e-02, -1.528e-01, 2.953e-02, 4.175e-03, 3.038e-02, -7.615e-02, -2.932e-02, -3.998e-03, 3.270e-04, -5.368e-02, 4.379e-03, -3.845e-02, -8.842e-02, 1.667e-01, -1.082e-01));
	r += mul(s3_6, M4(-4.638e-02, 7.122e-02, 4.453e-03, -1.115e-01, 4.361e-02, 8.474e-03, 1.944e-02, 2.350e-02, -1.112e-02, -5.496e-02, 6.062e-03, -4.375e-03, -4.352e-02, 7.870e-02, -1.025e-01, -1.199e-01));
	r += mul(s3_7, M4(-9.071e-02, -5.780e-02, 3.592e-02, 3.266e-02, -7.675e-02, 7.431e-02, -9.768e-02, 2.174e-01, -9.449e-02, 2.945e-02, -7.159e-02, -6.166e-02, 9.235e-02, -1.781e-01, -5.495e-02, 7.006e-02));
	r += mul(s3_8, M4(-1.563e-02, 6.241e-02, 3.845e-02, -4.126e-02, 5.171e-02, -7.622e-03, -1.806e-03, 1.221e-01, 1.445e-02, -3.859e-02, -2.371e-02, 2.360e-02, -3.919e-02, 5.525e-02, 7.753e-04, -4.543e-02));
	r += mul(s4_0, M4(-7.578e-03, 2.589e-02, -3.446e-02, 9.255e-04, 1.508e-02, -4.230e-02, -3.854e-02, -8.312e-02, 5.879e-02, 2.448e-02, 3.286e-02, -8.081e-02, 2.705e-02, 5.509e-02, 1.780e-03, 9.692e-03));
	r += mul(s4_1, M4(7.212e-04, -9.108e-03, 8.121e-03, -4.260e-02, -3.512e-02, 5.898e-02, 9.525e-03, 4.439e-02, -1.030e-02, -1.913e-02, 5.020e-02, -1.238e-01, 9.665e-02, -1.807e-02, 5.449e-02, 2.058e-01));
	r += mul(s4_2, M4(1.662e-02, 3.488e-05, -4.044e-02, -5.045e-02, -2.282e-02, 7.772e-02, 5.240e-02, 1.573e-01, 3.551e-02, -1.906e-02, -4.839e-02, 3.603e-02, -5.828e-03, 1.748e-02, -1.287e-01, 1.122e-01));
	r += mul(s4_3, M4(-4.855e-02, -6.473e-02, -3.205e-02, -1.106e-02, 6.551e-02, 1.172e-01, 3.972e-02, -1.110e-02, 1.708e-01, -7.197e-02, 5.240e-02, 6.689e-02, 4.516e-02, -6.609e-02, 1.406e-02, 7.274e-03));
	r += mul(s4_4, M4(-4.430e-02, -1.395e-01, -4.970e-02, 1.998e-02, 1.560e-01, -1.112e-01, 6.712e-02, -4.218e-04, -2.212e-01, 5.050e-03, -1.333e-01, 2.678e-01, 2.361e-01, -1.204e-01, -2.388e-01, -1.541e-01));
	r += mul(s4_5, M4(1.204e-02, -1.997e-02, -6.763e-02, 1.468e-01, -6.895e-02, 7.981e-02, 3.946e-02, -1.001e-01, -5.877e-03, 1.462e-01, 1.895e-02, 9.893e-02, -7.228e-03, -3.419e-01, 5.974e-02, -3.917e-01));
	r += mul(s4_6, M4(7.028e-03, -1.167e-01, -1.396e-02, 6.679e-02, -1.213e-01, -1.553e-01, -6.473e-02, 5.061e-02, -4.635e-02, 6.373e-02, -2.930e-02, 2.868e-02, 6.047e-02, 3.747e-03, -5.183e-03, 2.880e-02));
	r += mul(s4_7, M4(9.784e-02, 2.841e-02, -6.079e-02, 4.337e-02, 4.047e-02, 1.088e-01, 2.636e-02, 7.993e-02, 1.425e-01, -2.001e-02, 2.533e-02, -1.085e-02, 1.535e-02, 8.569e-02, 1.048e-01, 4.093e-02));
	r += mul(s4_8, M4(-2.997e-02, -2.626e-02, 5.060e-02, 2.708e-02, 1.564e-02, -4.655e-03, 1.835e-02, -4.612e-03, 1.653e-02, -6.783e-03, -4.532e-05, -8.021e-02, -1.153e-01, 1.970e-01, -1.619e-01, 1.018e-01));
	r += mul(s5_0, M4(-7.963e-02, 8.415e-02, -8.603e-02, -6.643e-02, 5.413e-02, 1.145e-02, 4.283e-03, 1.071e-02, 1.425e-02, 9.550e-03, -5.742e-03, 2.553e-03, 8.458e-03, -1.834e-03, 2.582e-02, 1.234e-02));
	r += mul(s5_1, M4(4.813e-02, -3.183e-02, -8.014e-02, -1.980e-02, -4.461e-02, 3.795e-02, 1.069e-01, -3.706e-02, -1.612e-03, 2.115e-02, 4.532e-02, 1.812e-02, 4.970e-03, -7.523e-03, 6.517e-03, 4.676e-02));
	r += mul(s5_2, M4(-3.456e-03, 8.783e-03, -3.684e-03, -3.165e-02, 5.899e-03, 4.142e-02, 4.836e-02, 8.154e-02, 2.633e-02, 5.588e-02, -6.064e-02, 5.039e-02, -1.274e-02, 5.913e-04, -7.067e-02, -3.322e-02));
	r += mul(s5_3, M4(9.003e-02, -8.612e-02, -1.561e-01, 3.709e-02, 5.988e-02, 8.174e-02, -2.205e-02, -2.136e-02, 2.638e-02, -1.036e-01, 6.140e-02, 3.036e-02, -4.891e-02, -2.581e-02, -4.581e-02, -4.246e-02));
	r += mul(s5_4, M4(9.493e-03, -3.479e-01, -7.998e-02, -9.455e-02, 3.910e-02, -1.593e-02, 4.751e-02, 6.188e-02, 9.840e-02, 1.341e-01, -5.374e-02, -2.259e-02, 1.011e-01, 2.673e-02, -9.297e-03, -1.089e-01));
	r += mul(s5_5, M4(-3.690e-02, -3.564e-03, -7.306e-02, 6.273e-03, 5.888e-03, 1.425e-02, 1.465e-02, -4.929e-02, -1.569e-02, -5.070e-03, 6.049e-02, 8.172e-02, 7.841e-03, -3.845e-03, 9.943e-02, 2.239e-02));
	r += mul(s5_6, M4(-2.568e-01, 7.435e-02, -2.228e-02, 7.944e-02, -5.772e-02, -6.852e-02, -2.834e-02, 8.123e-02, 1.470e-01, 1.859e-02, -2.704e-02, 9.363e-03, 3.166e-02, 2.803e-02, -1.214e-03, 1.293e-02));
	r += mul(s5_7, M4(-6.304e-02, -2.063e-01, -7.649e-02, -1.351e-01, 5.625e-02, 3.581e-02, 2.647e-02, 9.335e-03, 1.043e-01, 5.061e-03, -5.448e-03, -1.123e-01, 2.964e-02, -9.273e-03, 4.073e-02, 8.731e-03));
	r += mul(s5_8, M4(-6.478e-02, -6.498e-02, -5.031e-02, 2.011e-01, 6.402e-03, 4.651e-02, 1.568e-02, 8.174e-02, -4.208e-02, -2.012e-02, 7.375e-03, -2.978e-02, 3.769e-02, -5.344e-03, -1.052e-02, 4.279e-02));
	r += mul(s6_0, M4(-5.605e-03, -1.344e-02, 3.815e-02, 6.425e-02, -4.361e-04, -5.982e-03, -2.016e-03, -6.674e-03, 1.816e-03, -6.747e-03, 9.217e-03, 7.074e-02, 1.043e-02, -9.410e-03, -2.738e-02, 2.603e-02));
	r += mul(s6_1, M4(-3.818e-02, -4.530e-02, 1.982e-03, 1.438e-02, 2.250e-02, -7.125e-03, -7.718e-05, -1.763e-02, -1.533e-02, 3.678e-02, -2.880e-02, 6.018e-02, 3.396e-02, -3.313e-02, 3.760e-03, 5.717e-02));
	r += mul(s6_2, M4(1.833e-02, 4.613e-02, -8.530e-03, -2.255e-02, -7.005e-03, -1.823e-02, 6.326e-04, -1.960e-02, 1.857e-02, 4.009e-03, 1.863e-02, 1.340e-02, 7.106e-03, 1.505e-02, -3.722e-02, 9.284e-03));
	r += mul(s6_3, M4(-1.978e-02, -2.542e-02, -5.701e-02, -3.668e-02, -3.079e-02, 1.690e-02, -4.785e-04, -4.757e-02, 2.229e-02, 3.626e-02, 7.499e-02, 7.492e-02, -8.919e-02, -1.301e-02, 3.316e-02, -2.238e-02));
	r += mul(s6_4, M4(5.750e-02, 1.236e-01, -3.727e-02, -9.971e-02, 5.715e-02, 3.370e-02, -5.223e-03, 6.820e-02, -3.931e-02, -2.162e-02, 4.768e-02, -3.176e-01, 1.332e-01, 8.223e-03, -6.747e-02, 1.525e-02));
	r += mul(s6_5, M4(-5.534e-02, 2.196e-02, -3.586e-02, 4.086e-02, -3.519e-02, 5.550e-02, 7.686e-03, 1.698e-02, 4.426e-02, 4.819e-02, -3.056e-02, 4.639e-03, 8.257e-03, -5.203e-03, 4.029e-02, 1.317e-02));
	r += mul(s6_6, M4(-4.034e-02, 5.021e-02, -5.809e-03, 2.783e-02, -2.373e-02, -3.029e-02, -5.745e-02, -1.953e-02, 7.143e-02, 7.924e-02, 9.374e-03, -6.179e-03, -3.599e-02, 3.946e-02, 3.311e-02, 1.562e-02));
	r += mul(s6_7, M4(4.839e-02, -6.011e-02, -1.972e-02, 6.475e-02, -1.888e-02, -4.052e-02, -5.563e-02, -6.110e-02, 7.359e-02, -2.875e-02, 4.410e-02, -6.216e-02, 3.299e-02, -2.920e-02, 1.754e-02, 4.605e-02));
	r += mul(s6_8, M4(4.073e-02, -1.848e-02, -1.961e-02, -3.710e-02, 4.170e-02, 1.913e-02, -2.446e-02, -1.048e-01, -2.286e-02, -1.650e-02, -8.772e-03, 7.160e-02, -2.420e-02, -3.058e-02, 3.382e-02, 3.350e-02));
	r += mul(s7_0, M4(-2.956e-02, -5.428e-02, 3.595e-02, 2.180e-02, -2.980e-02, -1.348e-02, -4.355e-02, 6.596e-02, 3.844e-02, 7.667e-03, 2.615e-02, 4.893e-02, -7.366e-02, 3.688e-02, -3.084e-02, -2.588e-02));
	r += mul(s7_1, M4(5.338e-02, 4.289e-02, 3.982e-02, 6.554e-02, 2.097e-03, -3.347e-02, 4.455e-02, 2.917e-02, -6.689e-03, -3.879e-02, 8.891e-02, -9.196e-02, 7.563e-03, 1.112e-01, -1.969e-01, 8.919e-02));
	r += mul(s7_2, M4(6.999e-02, 1.312e-03, 1.772e-01, 2.577e-03, -3.489e-02, -1.944e-02, -9.796e-02, -2.732e-02, 4.999e-03, -6.560e-03, -4.190e-02, 6.350e-02, -2.773e-02, -2.716e-02, 6.816e-03, -4.187e-02));
	r += mul(s7_3, M4(-2.480e-02, 1.246e-01, -2.110e-04, -5.843e-02, 5.100e-02, -5.059e-02, -5.935e-02, -1.641e-01, 5.114e-02, -6.457e-02, -3.647e-02, -4.605e-02, -2.080e-01, -9.407e-02, -4.015e-02, 8.764e-02));
	r += mul(s7_4, M4(-2.201e-01, 4.873e-01, -1.571e-01, -8.180e-02, 7.717e-02, 2.528e-01, 1.153e-02, -3.846e-02, -1.282e-01, 3.397e-01, 1.587e-01, -2.980e-01, -1.068e-02, 8.088e-02, 3.736e-01, -1.061e-01));
	r += mul(s7_5, M4(2.394e-02, -6.763e-02, -1.783e-01, 3.603e-02, 4.565e-03, -1.765e-01, 1.102e-01, -2.667e-01, 1.554e-02, -6.939e-02, 1.764e-03, -3.768e-02, 2.578e-02, 9.643e-02, 3.601e-02, -2.208e-02));
	r += mul(s7_6, M4(2.830e-02, 8.147e-03, 6.145e-03, -4.349e-02, -1.741e-01, 2.157e-02, -1.054e-01, -1.158e-01, 4.637e-02, -1.196e-02, 1.201e-01, -8.628e-03, -2.468e-01, 3.790e-02, 5.359e-02, 2.575e-02));
	r += mul(s7_7, M4(-8.000e-02, 3.732e-02, 1.501e-02, -4.495e-03, 1.110e-01, 5.499e-02, 3.363e-02, -9.936e-02, 8.269e-02, 4.787e-02, -5.760e-02, 4.804e-03, -6.810e-02, -6.330e-02, -5.539e-02, 4.120e-02));
	r += mul(s7_8, M4(3.943e-02, 1.279e-02, -8.367e-04, 4.311e-02, -1.619e-01, 4.199e-01, -2.100e-03, -3.792e-01, -4.745e-02, -2.734e-03, -4.497e-02, -2.701e-02, -2.135e-02, -2.782e-02, 1.058e-02, 2.532e-02));
	r += V4(-7.390e-04, 1.816e-03, -2.728e-04, -1.866e-03);
	return r;
}

V4 f3(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) {
	V4 r = 0.0;
	r += mul(s0_0, M4(1.895e-02, -3.486e-03, -1.379e-02, -1.842e-02, -3.970e-02, 3.895e-02, -3.372e-03, -4.028e-02, -8.637e-02, 1.274e-01, -1.950e-02, -1.323e-01, 2.921e-02, -8.863e-03, -3.657e-02, -3.447e-02));
	r += mul(s0_1, M4(-1.112e-02, -1.559e-02, 1.731e-02, -9.551e-02, 4.760e-02, -4.934e-02, -2.834e-02, 2.646e-02, -7.670e-02, -6.197e-02, -1.755e-01, -2.571e-03, -8.377e-03, 2.422e-02, 3.033e-02, 1.416e-02));
	r += mul(s0_2, M4(-2.654e-03, -9.362e-02, 3.156e-02, -3.364e-02, 5.052e-03, 7.076e-02, -7.983e-03, 2.995e-03, 2.022e-02, 3.845e-02, 1.639e-02, 2.667e-02, 2.051e-02, -5.142e-02, 4.048e-03, 9.972e-03));
	r += mul(s0_3, M4(7.535e-02, -3.695e-02, -1.487e-03, 1.763e-02, 2.683e-02, 2.572e-02, -3.221e-03, 4.724e-02, 1.230e-01, -1.008e-02, -3.410e-01, -1.780e-02, -5.457e-02, 7.070e-02, 5.849e-02, 3.655e-02));
	r += mul(s0_4, M4(-1.365e-02, 6.174e-02, 3.209e-02, 1.707e-02, 2.051e-01, 3.121e-02, 1.777e-02, -5.450e-02, 8.808e-02, 2.535e-02, -3.146e-01, -9.044e-02, 4.733e-02, -1.513e-01, -4.178e-02, 1.460e-01));
	r += mul(s0_5, M4(5.314e-03, -9.108e-02, -1.646e-02, -6.978e-02, 4.098e-02, 4.081e-02, 3.569e-02, 3.792e-02, -5.984e-02, -5.551e-02, -2.214e-01, -4.605e-02, -6.742e-02, -4.717e-03, 3.778e-02, -4.409e-02));
	r += mul(s0_6, M4(-5.846e-02, 3.342e-02, 5.078e-02, -3.740e-02, 5.761e-02, -2.075e-02, 1.671e-02, -3.571e-02, -5.007e-02, -7.200e-02, 3.912e-02, -3.255e-02, 4.228e-02, -2.367e-02, -2.202e-02, 7.920e-03));
	r += mul(s0_7, M4(6.797e-02, -1.165e-01, 5.155e-02, -3.932e-02, 1.142e-02, 3.870e-02, -4.763e-04, -1.028e-02, -1.720e-01, 7.857e-02, -1.793e-02, -1.800e-02, 5.313e-02, -9.309e-02, 3.770e-02, -1.870e-02));
	r += mul(s0_8, M4(-1.552e-02, -1.472e-02, -4.440e-02, -1.742e-02, -9.677e-03, -2.365e-03, 8.017e-03, -3.447e-03, -4.267e-02, 9.093e-02, -5.832e-03, 1.950e-02, -4.456e-03, -1.277e-01, -9.661e-02, -3.767e-02));
	r += mul(s1_0, M4(1.220e-02, 2.545e-02, -2.427e-02, -2.919e-02, 1.376e-01, -4.778e-02, -5.280e-02, 3.424e-02, -1.951e-02, 5.586e-02, -2.064e-02, -1.663e-02, 5.072e-03, -1.511e-02, 2.885e-02, 3.522e-02));
	r += mul(s1_1, M4(-4.135e-03, -4.255e-02, 1.657e-02, 1.401e-02, 2.459e-01, -1.500e-01, -1.057e-01, 8.905e-02, 2.055e-02, -3.477e-02, -4.044e-02, -3.994e-02, 8.582e-03, 8.109e-02, -2.204e-03, -1.661e-02));
	r += mul(s1_2, M4(-5.406e-04, 1.480e-02, 2.395e-02, -2.375e-02, -4.139e-02, 7.214e-02, -6.710e-02, 1.183e-02, 6.631e-03, 4.481e-02, -2.836e-02, 1.352e-02, 1.651e-02, 1.306e-02, -4.984e-02, 8.676e-03));
	r += mul(s1_3, M4(-5.175e-03, 4.428e-02, -5.746e-02, 4.562e-02, 1.205e-02, -7.419e-02, 1.158e-02, 6.830e-02, 1.129e-02, -2.328e-02, 2.257e-02, 2.547e-02, 9.760e-03, 5.335e-02, -6.050e-02, 1.134e-01));
	r += mul(s1_4, M4(6.943e-04, 6.939e-02, 4.084e-03, -4.701e-02, 1.362e-01, -3.526e-01, -3.718e-02, 5.053e-02, -6.910e-02, 5.202e-02, -5.544e-02, -5.834e-02, -1.604e-02, 8.243e-02, 1.663e-01, -3.396e-02));
	r += mul(s1_5, M4(3.533e-03, -4.729e-02, -3.179e-02, 3.769e-02, 4.131e-02, -1.719e-01, -1.389e-02, 2.174e-02, 4.603e-03, -2.876e-02, 6.189e-02, 4.800e-02, 3.724e-02, 2.389e-02, -5.403e-02, 1.807e-02));
	r += mul(s1_6, M4(-1.131e-01, 8.189e-02, -1.971e-02, 1.542e-02, 9.368e-02, 6.963e-02, 3.110e-02, 1.709e-02, -1.058e-02, -2.140e-03, 2.814e-02, 1.187e-02, 8.828e-02, -1.966e-03, -1.097e-01, -3.622e-02));
	r += mul(s1_7, M4(8.828e-02, -1.550e-01, 2.141e-03, -2.533e-03, 2.275e-03, -9.305e-02, 2.913e-03, -2.725e-03, -3.023e-02, 7.153e-02, -2.468e-02, -1.383e-02, 9.642e-02, 1.112e-01, -2.530e-01, 5.328e-02));
	r += mul(s1_8, M4(-4.595e-02, 8.567e-02, -1.921e-02, 1.341e-02, -4.145e-03, -6.961e-02, -2.132e-03, -1.025e-02, 1.577e-02, -2.728e-02, 8.589e-03, -1.513e-02, -2.036e-03, 3.103e-02, 2.803e-02, -9.909e-03));
	r += mul(s2_0, M4(1.872e-02, -3.554e-02, -4.315e-02, 9.349e-03, 1.951e-02, 2.302e-02, -2.166e-02, 1.058e-02, 5.752e-02, -2.872e-02, 1.595e-03, -2.577e-02, 4.223e-02, 3.590e-02, -2.997e-02, -9.733e-03));
	r += mul(s2_1, M4(-6.303e-02, 8.675e-02, -4.988e-03, -4.818e-02, 6.319e-02, -9.892e-02, 8.761e-02, 5.963e-02, -3.507e-02, -1.705e-02, 6.934e-02, 4.970e-02, 2.267e-03, -4.190e-02, -2.179e-03, -3.351e-02));
	r += mul(s2_2, M4(2.213e-02, -3.701e-02, 3.497e-02, -5.799e-02, -3.614e-02, -2.253e-02, -4.799e-02, -4.115e-02, 8.547e-03, 7.009e-02, -1.611e-03, 3.784e-02, 1.500e-03, 5.809e-02, -2.469e-02, 2.074e-02));
	r += mul(s2_3, M4(-2.082e-02, 5.921e-02, -5.766e-03, -6.030e-02, 7.578e-03, 1.904e-02, -1.047e-01, 1.059e-01, -7.170e-02, 5.991e-02, 4.122e-02, -4.622e-02, 3.416e-03, 1.202e-03, 1.250e-02, 2.651e-02));
	r += mul(s2_4, M4(-5.199e-02, -8.660e-03, -9.264e-02, 4.332e-02, -2.770e-01, -9.477e-02, -1.739e-01, 3.310e-02, -5.992e-02, -2.027e-01, 1.018e-01, -6.506e-02, -1.506e-01, -3.280e-02, 7.540e-02, -2.998e-02));
	r += mul(s2_5, M4(-4.084e-02, -1.771e-02, 1.499e-02, -2.934e-02, -1.790e-02, -1.188e-01, 1.644e-01, -5.213e-02, -9.989e-03, 8.711e-02, -5.337e-02, -8.002e-03, -3.434e-02, 2.052e-02, 2.082e-02, 2.510e-02));
	r += mul(s2_6, M4(1.092e-02, 2.723e-02, 7.463e-03, -1.953e-02, -9.929e-02, -3.255e-02, 7.282e-02, -8.302e-02, 4.877e-02, -2.527e-02, 2.649e-04, 2.636e-02, -3.057e-02, -2.064e-02, 3.504e-02, 1.511e-02));
	r += mul(s2_7, M4(-2.356e-02, 1.447e-02, 3.365e-02, -6.376e-03, 1.848e-02, -4.435e-02, -1.451e-01, -1.324e-01, -1.865e-02, 2.552e-02, -5.798e-02, 8.032e-03, -4.240e-02, 6.194e-02, -7.205e-02, -7.899e-03));
	r += mul(s2_8, M4(5.712e-02, -8.752e-03, 3.965e-02, -1.018e-02, 5.286e-03, -3.574e-02, 7.631e-02, -1.518e-02, -1.465e-02, 1.768e-02, 2.405e-02, 8.130e-03, 2.138e-04, -6.461e-03, -2.955e-02, 3.826e-03));
	r += mul(s3_0, M4(-4.841e-02, 2.032e-01, -3.635e-02, -2.115e-01, 1.990e-02, 4.568e-03, 2.516e-03, -1.000e-02, 5.002e-02, -4.300e-02, 9.021e-03, -5.821e-02, -7.489e-02, 1.239e-02, 1.077e-01, 1.305e-01));
	r += mul(s3_1, M4(-4.201e-02, -1.989e-01, -3.334e-02, -6.472e-03, 1.388e-02, 2.082e-02, -4.926e-02, -1.762e-02, -5.040e-02, -2.869e-03, 2.892e-02, -4.308e-02, -2.206e-02, 1.446e-02, -7.367e-02, -6.763e-02));
	r += mul(s3_2, M4(2.386e-02, -9.420e-02, 1.363e-01, 1.211e-02, -6.687e-04, -3.608e-02, 7.732e-03, 1.757e-03, 2.526e-03, -1.860e-02, 4.211e-02, 2.851e-02, 4.414e-03, 6.763e-03, 8.533e-02, 3.429e-03));
	r += mul(s3_3, M4(-2.262e-01, 4.314e-02, -9.302e-02, 1.245e-01, 1.110e-01, 3.947e-02, -1.664e-02, 3.281e-02, -4.983e-02, 1.251e-02, 4.169e-02, -5.546e-02, 2.145e-01, -1.460e-01, 3.327e-01, -6.628e-02));
	r += mul(s3_4, M4(-2.728e-02, -4.408e-01, -9.411e-02, -2.079e-02, -1.833e-01, 6.247e-02, -7.999e-02, 7.305e-02, -7.140e-02, -5.381e-02, 2.619e-02, 4.561e-02, -1.321e-01, 1.948e-01, 5.573e-01, 6.632e-02));
	r += mul(s3_5, M4(-1.393e-01, -9.259e-02, -7.399e-02, -1.722e-01, -1.226e-02, -2.651e-02, 2.733e-02, -3.692e-02, -8.345e-03, 3.471e-02, 1.499e-02, -5.093e-02, 4.088e-03, 1.013e-02, 4.411e-02, 2.132e-02));
	r += mul(s3_6, M4(-1.362e-01, 1.633e-02, 3.048e-03, -2.313e-02, -1.697e-03, -5.715e-03, 2.150e-02, -1.245e-02, 6.827e-02, -7.725e-02, 5.958e-02, -2.006e-03, -8.098e-02, 1.432e-02, -1.368e-01, 2.472e-02));
	r += mul(s3_7, M4(-6.272e-02, -1.959e-01, 1.151e-01, -3.563e-02, -5.832e-02, -4.127e-02, -6.041e-04, 3.039e-02, 8.441e-03, 4.542e-02, -5.644e-02, -3.768e-03, -5.552e-03, -2.609e-02, 1.490e-01, -3.899e-02));
	r += mul(s3_8, M4(-2.868e-02, 1.260e-01, 2.419e-02, -7.906e-03, -1.357e-02, 6.577e-02, -3.260e-02, -1.060e-02, -9.049e-03, -8.496e-02, 1.345e-02, 1.437e-02, -4.895e-02, 5.249e-02, -3.312e-02, -1.726e-02));
	r += mul(s4_0, M4(1.845e-02, -1.428e-02, 3.025e-02, -3.601e-02, -7.456e-02, 4.700e-02, -5.271e-03, 8.968e-02, -3.187e-02, 8.715e-03, 1.572e-03, 7.963e-03, -2.486e-02, -3.948e-02, -4.365e-02, 1.295e-02));
	r += mul(s4_1, M4(5.060e-02, -3.672e-02, 4.173e-02, 5.038e-02, -1.104e-01, 4.492e-02, -6.302e-02, 2.167e-02, 1.372e-01, -3.682e-02, -3.740e-02, 2.062e-02, -6.461e-02, -5.470e-03, -8.412e-02, -1.235e-01));
	r += mul(s4_2, M4(-1.806e-02, 1.559e-02, -2.325e-02, -4.432e-02, 3.505e-02, -4.632e-03, 4.085e-02, 1.421e-01, -4.986e-03, 4.484e-03, 6.821e-03, 5.988e-03, 1.689e-01, -7.924e-02, 2.597e-02, -3.159e-02));
	r += mul(s4_3, M4(1.002e-01, -6.215e-02, -2.484e-02, -9.233e-02, 4.768e-02, -1.974e-01, -8.080e-03, 5.666e-03, -1.172e-02, 4.765e-03, -2.743e-03, -5.749e-02, -3.136e-02, -1.183e-02, 4.700e-02, 2.595e-02));
	r += mul(s4_4, M4(-3.767e-02, 2.888e-02, 1.105e-01, 1.856e-02, 9.276e-02, 7.851e-02, -5.774e-02, -1.489e-01, -7.548e-02, -7.316e-02, 6.093e-01, 3.221e-01, -5.959e-01, 9.822e-02, 8.992e-02, 1.061e-01));
	r += mul(s4_5, M4(-5.871e-03, 9.548e-03, -1.592e-02, 1.531e-02, 5.870e-02, -3.468e-03, 4.422e-02, 2.998e-02, 3.743e-03, -5.535e-02, 3.347e-02, 1.525e-02, 3.906e-02, 6.235e-02, -1.618e-02, 1.045e-01));
	r += mul(s4_6, M4(1.008e-03, 3.806e-03, 1.244e-02, -7.620e-02, 1.407e-02, 8.177e-03, 6.615e-02, -2.112e-02, -4.001e-02, -2.659e-02, 2.051e-03, -1.621e-02, 1.130e-01, -3.558e-02, -6.091e-02, -6.975e-03));
	r += mul(s4_7, M4(-1.033e-01, 5.164e-02, -8.706e-02, 5.892e-02, -2.319e-02, -6.319e-02, -1.004e-01, 6.903e-02, 3.794e-02, -1.284e-01, 5.790e-02, -3.345e-03, -1.194e-01, 1.352e-01, -8.862e-02, 9.251e-02));
	r += mul(s4_8, M4(6.850e-02, 2.214e-02, -2.984e-02, -6.584e-03, -6.177e-02, 1.027e-01, -1.062e-04, 1.554e-02, 2.624e-02, 2.446e-02, -2.709e-02, -6.687e-03, -8.656e-03, 6.004e-02, -8.694e-02, 2.832e-02));
	r += mul(s5_0, M4(6.199e-02, -1.131e-01, 6.170e-02, -7.534e-03, -4.255e-02, 4.538e-02, -3.063e-02, 4.212e-02, 2.331e-02, -1.634e-02, -6.050e-03, -4.349e-02, 2.814e-03, -1.989e-03, -1.116e-02, -2.933e-02));
	r += mul(s5_1, M4(-3.261e-02, 7.652e-02, 6.421e-02, -1.008e-01, 5.395e-02, 5.759e-02, -1.665e-01, 1.023e-01, 5.627e-02, -2.189e-02, 8.770e-03, 1.208e-01, -1.837e-02, -5.015e-02, 3.498e-02, 4.259e-02));
	r += mul(s5_2, M4(2.237e-02, 9.596e-03, -4.689e-02, 1.668e-02, -4.693e-02, 3.892e-02, -4.201e-02, 8.341e-03, 1.646e-02, 9.753e-02, 7.488e-04, -4.371e-02, -4.316e-02, 1.629e-02, 1.219e-02, -4.465e-02));
	r += mul(s5_3, M4(1.071e-01, 1.818e-01, 1.804e-01, -4.161e-01, -5.134e-02, -2.543e-03, 4.252e-02, -1.254e-01, 8.945e-02, -6.327e-02, -1.708e-02, -7.655e-02, -1.818e-02, 1.160e-02, 5.056e-02, -9.572e-03));
	r += mul(s5_4, M4(-7.047e-02, 5.558e-02, -2.566e-01, 2.293e-02, 1.344e-01, -5.759e-02, 1.336e-01, -1.365e-01, -1.018e-01, 1.131e-01, -2.599e-04, 5.203e-02, -4.006e-02, 5.433e-02, -1.179e-02, 4.684e-02));
	r += mul(s5_5, M4(9.100e-03, -5.963e-02, -4.189e-02, 2.374e-02, -1.049e-02, -1.697e-02, 3.455e-02, 7.721e-02, 4.418e-02, 9.174e-02, 1.238e-01, 6.190e-02, 7.101e-02, 7.415e-02, 8.181e-03, 5.949e-02));
	r += mul(s5_6, M4(-3.657e-02, 8.785e-02, -1.366e-01, 3.699e-02, -5.526e-02, 3.303e-02, -1.172e-02, 5.110e-02, 1.694e-02, -4.945e-02, -1.527e-02, -5.031e-02, 4.639e-03, -3.881e-03, -2.765e-02, -5.272e-03));
	r += mul(s5_7, M4(-1.075e-01, -3.386e-02, -4.008e-03, -2.277e-01, 2.691e-02, 5.804e-02, -7.960e-02, -1.331e-02, -1.555e-03, 2.882e-02, -2.749e-02, 1.986e-02, 4.661e-02, 5.051e-03, 2.789e-04, -1.044e-02));
	r += mul(s5_8, M4(3.584e-02, 5.666e-02, 2.708e-02, -7.064e-02, -1.714e-02, 3.086e-02, -6.665e-03, 1.561e-02, 3.868e-02, 1.260e-02, 3.594e-02, 1.405e-02, 4.834e-02, 1.432e-02, -1.207e-03, -3.702e-02));
	r += mul(s6_0, M4(-7.545e-03, 5.817e-03, -3.976e-03, -6.886e-02, 1.241e-03, 3.949e-02, -9.141e-03, -2.932e-02, 3.061e-02, -3.527e-03, 4.619e-02, -6.930e-03, 4.226e-03, -3.380e-02, -4.733e-03, -9.659e-02));
	r += mul(s6_1, M4(1.434e-02, -5.702e-02, 1.086e-02, 1.870e-02, -8.674e-04, -2.612e-03, -4.122e-02, -1.223e-02, 3.054e-02, 2.505e-02, 4.928e-02, -2.850e-02, -6.155e-02, -2.670e-02, -2.795e-02, -2.198e-02));
	r += mul(s6_2, M4(3.110e-02, 1.396e-02, 4.330e-02, -5.054e-03, -1.258e-03, 9.789e-04, 2.307e-03, 3.330e-03, 1.202e-02, -1.888e-03, -7.485e-03, -3.196e-02, 4.168e-03, 1.211e-02, 4.629e-02, -1.252e-02));
	r += mul(s6_3, M4(-5.995e-02, 2.188e-02, -2.808e-02, 1.556e-02, -1.819e-02, -5.827e-03, -2.431e-02, 4.258e-02, 6.412e-03, 2.012e-02, 1.066e-02, 8.169e-02, 4.143e-02, 2.179e-01, -3.265e-02, -8.631e-03));
	r += mul(s6_4, M4(1.035e-01, -1.406e-01, 1.473e-01, 7.944e-02, -3.233e-02, -1.322e-02, 1.975e-02, 4.355e-02, -6.917e-02, 6.086e-02, 1.008e-03, -5.061e-02, -1.441e-01, 6.546e-02, -2.602e-02, -1.753e-01));
	r += mul(s6_5, M4(8.260e-02, -4.606e-02, 1.443e-01, 9.276e-03, -6.593e-03, 3.684e-02, 3.764e-02, 1.085e-03, -1.626e-02, -5.051e-02, -1.110e-02, 7.292e-02, 3.659e-02, -2.852e-02, -3.503e-02, 2.781e-02));
	r += mul(s6_6, M4(-2.993e-02, 3.806e-02, 3.685e-03, -1.483e-02, -3.055e-02, -1.343e-02, 5.796e-02, -2.555e-02, -3.573e-03, 2.753e-02, -1.052e-01, 5.268e-03, -6.556e-02, 1.491e-02, 1.841e-02, 2.626e-02));
	r += mul(s6_7, M4(6.475e-02, -1.022e-01, -1.800e-02, -6.002e-03, 1.126e-01, -1.013e-01, 1.138e-01, -5.236e-02, 5.473e-02, -4.970e-02, 7.074e-02, -2.390e-02, -2.350e-02, -2.878e-02, 2.098e-02, 7.152e-02));
	r += mul(s6_8, M4(3.902e-02, -7.221e-03, 2.533e-02, -1.634e-02, -7.790e-02, 1.895e-02, 3.363e-02, 2.468e-02, 5.341e-02, 3.363e-02, -8.903e-03, 2.315e-03, 5.390e-03, 1.790e-02, -2.082e-02, -2.614e-04));
	r += mul(s7_0, M4(4.407e-02, -4.841e-02, 8.242e-02, 2.578e-02, -2.986e-02, -2.456e-02, -3.235e-02, 2.050e-02, 1.417e-02, 3.272e-02, -5.705e-03, -5.092e-02, 3.907e-02, -7.260e-02, -9.419e-02, 1.317e-02));
	r += mul(s7_1, M4(-6.845e-03, 2.661e-02, 7.769e-03, -6.435e-02, 7.383e-03, -1.629e-02, -4.591e-02, -7.700e-02, 6.616e-02, -9.670e-03, 1.328e-02, 7.153e-02, -4.024e-02, -2.898e-02, 4.793e-02, 7.884e-02));
	r += mul(s7_2, M4(-1.220e-02, -8.752e-02, -6.199e-02, 9.084e-03, 1.325e-02, -5.939e-02, 4.166e-02, 7.510e-03, -2.051e-02, -3.170e-02, 2.921e-02, -3.657e-02, -4.016e-02, 3.860e-02, -6.299e-02, -3.094e-02));
	r += mul(s7_3, M4(1.389e-01, 8.623e-05, 2.497e-03, 6.358e-02, 2.282e-02, 5.944e-03, 9.160e-02, 6.074e-02, -4.673e-02, 8.961e-02, 4.270e-02, 1.630e-01, 5.060e-01, 5.417e-03, 1.208e-01, 3.584e-02));
	r += mul(s7_4, M4(3.425e-02, 1.627e-01, 1.463e-01, -1.593e-01, 5.820e-02, -1.009e-01, -2.588e-01, 1.984e-01, -5.505e-02, -8.789e-02, -2.568e-02, -1.750e-01, 1.750e-01, -4.991e-01, -1.489e-01, -1.970e-02));
	r += mul(s7_5, M4(-9.526e-02, -5.354e-02, 4.962e-02, 1.750e-02, -8.023e-02, 2.113e-01, -1.938e-01, -2.143e-01, 1.331e-02, 1.890e-02, 8.502e-03, 1.317e-01, 3.780e-03, -1.992e-02, 1.165e-02, 5.067e-03));
	r += mul(s7_6, M4(8.740e-04, -1.707e-02, -5.092e-03, -2.246e-02, 3.230e-02, -2.677e-02, -4.519e-02, 1.569e-02, 2.273e-02, -7.644e-02, 5.968e-02, -5.865e-02, -7.115e-02, 2.750e-02, -2.825e-02, 5.514e-02));
	r += mul(s7_7, M4(1.093e-01, -4.552e-02, -2.246e-02, 5.209e-02, 1.481e-01, -4.852e-02, -7.095e-02, 1.554e-01, -5.972e-02, 4.680e-02, 4.096e-02, 7.233e-02, -2.237e-02, -1.442e-01, 4.287e-02, 4.532e-02));
	r += mul(s7_8, M4(1.076e-03, -9.680e-02, 6.785e-03, 3.552e-02, -9.446e-02, 1.963e-02, 1.414e-01, 1.773e-02, -2.142e-02, 3.047e-02, -3.103e-02, -2.439e-02, 1.599e-03, 8.774e-03, 1.240e-02, 2.460e-02));
	r += V4(8.583e-03, -6.438e-03, 6.338e-03, 1.485e-04);
	return r;
}

void Pass9(uint2 blockStart, uint3 tid) {
	float2 pt = float2(GetInputPt());
	uint2 gxy = Rmp8x8(tid.x) + blockStart;
	uint2 size = GetInputSize();
	if (gxy.x >= size.x || gxy.y >= size.y) {
		return;
	}
	float2 pos = (gxy + 0.5) * pt;

	V4 s0_0 = l0(-1.0, -1.0);
	V4 s0_1 = l0(0.0, -1.0);
	V4 s0_2 = l0(1.0, -1.0);
	V4 s0_3 = l0(-1.0, 0.0);
	V4 s0_4 = l0(0.0, 0.0);
	V4 s0_5 = l0(1.0, 0.0);
	V4 s0_6 = l0(-1.0, 1.0);
	V4 s0_7 = l0(0.0, 1.0);
	V4 s0_8 = l0(1.0, 1.0);
	V4 s1_0 = -max(-s0_0, 0.0);
	V4 s1_1 = -max(-s0_1, 0.0);
	V4 s1_2 = -max(-s0_2, 0.0);
	V4 s1_3 = -max(-s0_3, 0.0);
	V4 s1_4 = -max(-s0_4, 0.0);
	V4 s1_5 = -max(-s0_5, 0.0);
	V4 s1_6 = -max(-s0_6, 0.0);
	V4 s1_7 = -max(-s0_7, 0.0);
	V4 s1_8 = -max(-s0_8, 0.0);
	s0_0 = max(s0_0, 0.0);
	s0_1 = max(s0_1, 0.0);
	s0_2 = max(s0_2, 0.0);
	s0_3 = max(s0_3, 0.0);
	s0_4 = max(s0_4, 0.0);
	s0_5 = max(s0_5, 0.0);
	s0_6 = max(s0_6, 0.0);
	s0_7 = max(s0_7, 0.0);
	s0_8 = max(s0_8, 0.0);

	V4 s2_0 = l1(-1.0, -1.0);
	V4 s2_1 = l1(0.0, -1.0);
	V4 s2_2 = l1(1.0, -1.0);
	V4 s2_3 = l1(-1.0, 0.0);
	V4 s2_4 = l1(0.0, 0.0);
	V4 s2_5 = l1(1.0, 0.0);
	V4 s2_6 = l1(-1.0, 1.0);
	V4 s2_7 = l1(0.0, 1.0);
	V4 s2_8 = l1(1.0, 1.0);
	V4 s3_0 = -max(-s2_0, 0.0);
	V4 s3_1 = -max(-s2_1, 0.0);
	V4 s3_2 = -max(-s2_2, 0.0);
	V4 s3_3 = -max(-s2_3, 0.0);
	V4 s3_4 = -max(-s2_4, 0.0);
	V4 s3_5 = -max(-s2_5, 0.0);
	V4 s3_6 = -max(-s2_6, 0.0);
	V4 s3_7 = -max(-s2_7, 0.0);
	V4 s3_8 = -max(-s2_8, 0.0);
	s2_0 = max(s2_0, 0.0);
	s2_1 = max(s2_1, 0.0);
	s2_2 = max(s2_2, 0.0);
	s2_3 = max(s2_3, 0.0);
	s2_4 = max(s2_4, 0.0);
	s2_5 = max(s2_5, 0.0);
	s2_6 = max(s2_6, 0.0);
	s2_7 = max(s2_7, 0.0);
	s2_8 = max(s2_8, 0.0);

	V4 s4_0 = l2(-1.0, -1.0);
	V4 s4_1 = l2(0.0, -1.0);
	V4 s4_2 = l2(1.0, -1.0);
	V4 s4_3 = l2(-1.0, 0.0);
	V4 s4_4 = l2(0.0, 0.0);
	V4 s4_5 = l2(1.0, 0.0);
	V4 s4_6 = l2(-1.0, 1.0);
	V4 s4_7 = l2(0.0, 1.0);
	V4 s4_8 = l2(1.0, 1.0);
	V4 s5_0 = -max(-s4_0, 0.0);
	V4 s5_1 = -max(-s4_1, 0.0);
	V4 s5_2 = -max(-s4_2, 0.0);
	V4 s5_3 = -max(-s4_3, 0.0);
	V4 s5_4 = -max(-s4_4, 0.0);
	V4 s5_5 = -max(-s4_5, 0.0);
	V4 s5_6 = -max(-s4_6, 0.0);
	V4 s5_7 = -max(-s4_7, 0.0);
	V4 s5_8 = -max(-s4_8, 0.0);
	s4_0 = max(s4_0, 0.0);
	s4_1 = max(s4_1, 0.0);
	s4_2 = max(s4_2, 0.0);
	s4_3 = max(s4_3, 0.0);
	s4_4 = max(s4_4, 0.0);
	s4_5 = max(s4_5, 0.0);
	s4_6 = max(s4_6, 0.0);
	s4_7 = max(s4_7, 0.0);
	s4_8 = max(s4_8, 0.0);

	V4 s6_0 = l3(-1.0, -1.0);
	V4 s6_1 = l3(0.0, -1.0);
	V4 s6_2 = l3(1.0, -1.0);
	V4 s6_3 = l3(-1.0, 0.0);
	V4 s6_4 = l3(0.0, 0.0);
	V4 s6_5 = l3(1.0, 0.0);
	V4 s6_6 = l3(-1.0, 1.0);
	V4 s6_7 = l3(0.0, 1.0);
	V4 s6_8 = l3(1.0, 1.0);
	V4 s7_0 = -max(-s6_0, 0.0);
	V4 s7_1 = -max(-s6_1, 0.0);
	V4 s7_2 = -max(-s6_2, 0.0);
	V4 s7_3 = -max(-s6_3, 0.0);
	V4 s7_4 = -max(-s6_4, 0.0);
	V4 s7_5 = -max(-s6_5, 0.0);
	V4 s7_6 = -max(-s6_6, 0.0);
	V4 s7_7 = -max(-s6_7, 0.0);
	V4 s7_8 = -max(-s6_8, 0.0);
	s6_0 = max(s6_0, 0.0);
	s6_1 = max(s6_1, 0.0);
	s6_2 = max(s6_2, 0.0);
	s6_3 = max(s6_3, 0.0);
	s6_4 = max(s6_4, 0.0);
	s6_5 = max(s6_5, 0.0);
	s6_6 = max(s6_6, 0.0);
	s6_7 = max(s6_7, 0.0);
	s6_8 = max(s6_8, 0.0);

	t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8);
	t1[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8);
	t2[gxy] = f2(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8);
	t3[gxy] = f3(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8);
}

//!PASS 10
//!DESC out-shuffle
//!BLOCK_SIZE 16
//!NUM_THREADS 64
//!IN INPUT, t0, t1, t2, t3
//!OUT OUTPUT

#define l0(x, y) V4(O(t0, float2(x, y)))
#define l1(x, y) V4(O(t1, float2(x, y)))
#define l2(x, y) V4(O(t2, float2(x, y)))
#define l3(x, y) V4(O(t3, float2(x, y)))

V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) {
	V4 r = 0.0;
	r += mul(s0_0, M4(8.288e-03, -1.321e-02, -2.106e-02, -1.756e-02, 3.676e-02, 1.095e-02, 1.181e-02, -2.697e-03, 4.596e-02, 2.905e-02, 1.476e-02, 2.231e-02, 2.701e-03, 3.902e-03, -3.031e-03, 8.976e-03));
	r += mul(s0_1, M4(3.357e-02, 4.993e-02, 2.113e-02, 2.618e-02, -5.839e-02, 4.692e-03, 2.007e-02, 1.737e-02, 1.132e-02, 4.016e-02, -2.122e-02, -2.512e-02, 2.428e-02, 2.614e-02, -5.047e-02, 1.221e-02));
	r += mul(s0_2, M4(1.581e-03, 2.179e-02, -2.680e-02, -2.460e-02, 1.697e-03, -3.966e-02, -2.177e-03, 2.177e-03, 3.197e-03, 2.340e-02, -2.558e-03, 1.068e-02, 2.225e-02, 4.831e-04, 2.057e-02, -3.325e-02));
	r += mul(s0_3, M4(-8.245e-03, 1.447e-02, 4.863e-02, 3.704e-02, 9.376e-03, -1.558e-02, 4.408e-02, 5.230e-02, -1.237e-01, 2.204e-02, -4.083e-02, -7.021e-02, -3.404e-02, 2.196e-02, -5.405e-02, 1.713e-02));
	r += mul(s0_4, M4(-1.691e-02, -5.413e-02, 1.412e-02, -4.122e-02, 3.281e-02, 1.344e-01, -9.106e-02, 5.237e-02, 7.495e-02, -2.874e-01, 1.509e-01, 9.939e-02, 6.605e-02, 1.485e-02, 2.092e-01, -3.814e-02));
	r += mul(s0_5, M4(3.302e-02, 9.497e-03, 5.188e-02, 7.025e-02, -3.038e-02, -4.289e-02, 8.231e-03, -6.615e-02, -3.845e-02, 6.910e-02, -1.664e-03, 1.457e-02, -3.180e-02, -7.829e-02, -7.862e-02, -4.652e-02));
	r += mul(s0_6, M4(9.334e-03, -2.657e-02, -6.000e-02, -5.408e-02, 3.188e-02, -1.277e-02, 1.829e-02, -6.218e-02, 1.772e-02, 7.507e-03, -1.788e-02, 9.071e-03, 8.863e-03, -9.756e-03, -8.313e-04, -1.364e-02));
	r += mul(s0_7, M4(-6.391e-02, 1.455e-02, -4.018e-03, -1.183e-02, -1.013e-01, -3.137e-02, -4.261e-02, 2.507e-02, -5.843e-03, 5.284e-02, -7.524e-02, -2.047e-02, -6.055e-02, 2.008e-02, 1.745e-02, 6.396e-02));
	r += mul(s0_8, M4(9.589e-03, -7.702e-03, -1.385e-02, -3.273e-03, 7.148e-03, -2.582e-02, -1.582e-02, -4.480e-02, 1.874e-02, 7.253e-03, -7.904e-03, -2.832e-02, 9.305e-03, -1.280e-02, -3.018e-02, 9.239e-03));
	r += mul(s1_0, M4(-2.402e-03, -1.702e-02, -1.430e-02, -1.437e-02, 3.541e-02, 1.098e-02, 1.285e-02, -4.486e-03, -3.848e-03, 1.516e-02, 1.246e-02, 2.232e-02, -1.736e-02, -6.417e-03, -5.782e-03, 1.797e-02));
	r += mul(s1_1, M4(2.029e-02, 3.807e-02, 3.010e-02, 3.850e-02, -6.865e-02, -1.083e-02, 2.365e-02, 1.968e-02, -1.490e-02, -2.014e-02, -8.668e-03, -2.748e-02, -3.469e-04, 9.138e-02, -1.043e-01, 2.116e-02));
	r += mul(s1_2, M4(-7.515e-03, 3.076e-03, -2.357e-02, -1.654e-02, -4.330e-03, -3.571e-02, -3.355e-03, 4.346e-04, 5.423e-03, 9.114e-03, 3.344e-03, 2.062e-02, 2.949e-02, -7.357e-02, 1.904e-02, -2.418e-02));
	r += mul(s1_3, M4(-3.964e-02, 1.319e-02, 1.290e-02, 1.144e-02, 3.125e-02, -1.283e-02, 5.506e-02, 5.888e-02, -7.126e-03, 2.046e-03, 2.282e-02, -3.216e-02, 1.672e-02, 9.030e-03, -5.734e-02, 3.116e-02));
	r += mul(s1_4, M4(3.315e-01, 5.508e-02, -1.405e-01, -1.298e-01, 3.390e-02, 1.265e-01, -1.198e-01, 1.523e-02, 1.206e-01, -1.592e-02, -1.558e-02, 4.463e-03, -5.783e-02, 1.140e-03, 6.348e-01, -2.935e-01));
	r += mul(s1_5, M4(2.099e-02, 1.821e-01, 5.430e-02, -2.893e-03, -2.511e-02, -3.087e-02, 6.097e-03, -5.772e-02, -1.947e-02, 1.794e-02, -3.531e-03, -2.538e-02, -1.085e-01, 1.207e-01, -1.402e-01, -1.096e-01));
	r += mul(s1_6, M4(3.216e-02, -2.144e-02, -6.793e-02, -5.081e-02, 4.411e-02, -7.452e-03, 3.820e-02, -4.924e-02, -2.191e-02, -3.942e-03, -6.666e-02, -3.475e-03, 1.302e-02, -1.511e-02, -1.236e-02, -1.293e-02));
	r += mul(s1_7, M4(5.775e-03, 6.997e-02, -2.382e-01, -1.491e-01, -8.038e-02, -1.730e-02, -1.200e-02, 4.096e-02, -4.595e-02, -1.271e-02, 6.949e-02, 5.294e-03, -5.210e-02, 3.667e-03, -6.985e-02, 7.442e-02));
	r += mul(s1_8, M4(2.313e-02, 2.815e-02, -3.577e-03, -8.620e-02, 8.754e-03, -1.742e-02, -1.739e-02, -2.487e-02, 4.649e-03, -1.378e-02, -1.452e-02, 2.420e-02, -6.308e-04, -1.359e-02, -5.233e-02, 1.669e-02));
	r += mul(s2_0, M4(-3.368e-02, -3.980e-03, -9.703e-04, 1.080e-02, 9.988e-03, -1.531e-02, -4.364e-03, -1.666e-02, 3.106e-03, -5.797e-03, -9.667e-03, -1.150e-02, -2.375e-02, 2.223e-02, 2.287e-02, 2.405e-02));
	r += mul(s2_1, M4(5.779e-03, -2.728e-02, -3.430e-03, -2.802e-02, 6.992e-03, 5.027e-03, -1.433e-02, -1.959e-02, -2.256e-02, 2.146e-02, -1.511e-03, 1.838e-02, -4.855e-03, -6.710e-02, 5.815e-03, -5.167e-03));
	r += mul(s2_2, M4(-1.631e-02, -4.753e-03, 6.240e-03, 2.216e-02, -7.764e-04, 2.601e-02, -1.759e-02, -1.272e-02, -9.234e-03, 1.193e-02, -1.557e-02, 9.221e-03, 3.824e-03, 1.605e-02, -1.521e-03, 4.219e-03));
	r += mul(s2_3, M4(5.929e-02, -1.351e-02, -3.640e-02, -7.495e-02, 2.995e-04, -5.310e-03, 9.308e-03, 5.831e-03, -3.071e-02, 3.279e-02, 2.226e-02, 2.718e-02, -7.876e-02, -1.108e-02, -1.091e-01, -3.100e-03));
	r += mul(s2_4, M4(2.597e-01, 2.576e-01, -1.353e-01, -3.268e-02, -1.675e-02, -1.212e-02, -1.013e-01, 7.113e-03, -1.483e-01, -6.736e-02, -1.158e-01, -5.368e-02, -2.017e-02, -1.257e-01, -7.094e-02, -2.066e-01));
	r += mul(s2_5, M4(-6.604e-03, 4.490e-02, -1.689e-02, -8.739e-02, -1.825e-02, 2.317e-02, -7.293e-03, 6.325e-02, -3.538e-02, 2.314e-01, -3.612e-03, 9.028e-02, 1.150e-02, 3.354e-02, 2.008e-02, 4.114e-02));
	r += mul(s2_6, M4(-6.085e-02, 8.006e-03, 5.597e-03, 4.375e-03, 5.102e-02, -7.769e-03, 5.785e-02, -3.648e-02, 2.499e-02, -1.236e-02, -2.100e-02, -9.887e-03, 1.139e-02, 1.272e-02, 9.081e-03, 2.087e-03));
	r += mul(s2_7, M4(-2.607e-02, -9.075e-02, 3.944e-02, 8.550e-02, 1.373e-02, 2.031e-02, 1.814e-02, -7.935e-02, -4.653e-02, 8.520e-02, -8.139e-02, 5.661e-02, 1.263e-02, 1.954e-02, 2.187e-02, 3.499e-02));
	r += mul(s2_8, M4(-1.408e-02, -2.322e-02, -2.956e-02, -5.237e-02, -3.299e-02, -6.760e-03, 3.166e-02, 9.487e-02, 4.560e-03, 5.893e-03, -5.681e-02, 1.580e-01, 1.593e-03, 1.052e-03, 5.417e-03, 1.003e-02));
	r += mul(s3_0, M4(-1.585e-02, -6.165e-03, -6.595e-03, 6.867e-03, 2.167e-02, -1.104e-02, -5.165e-03, -1.270e-02, -1.542e-03, -9.897e-04, -9.215e-03, -7.713e-03, -3.071e-02, 2.166e-02, 2.315e-02, 2.699e-02));
	r += mul(s3_1, M4(2.748e-02, -9.194e-03, -9.189e-03, -3.307e-02, 2.411e-02, 8.146e-03, -1.984e-02, -1.898e-02, -1.813e-02, 4.340e-03, 5.052e-03, 1.439e-02, -1.342e-02, -7.745e-02, 4.404e-03, -8.122e-03));
	r += mul(s3_2, M4(-9.177e-03, 1.751e-02, 4.400e-03, 2.236e-02, 4.106e-03, -8.324e-03, -2.258e-02, -2.498e-03, 4.850e-03, -1.186e-02, -1.428e-02, -8.462e-03, 1.590e-03, 1.123e-02, -6.362e-03, 1.537e-03));
	r += mul(s3_3, M4(1.300e-02, -2.392e-02, 1.641e-02, -5.358e-02, 2.303e-02, 9.964e-04, 2.607e-02, 1.629e-02, -4.236e-02, 3.306e-02, 1.082e-02, 3.368e-02, -9.061e-02, -1.555e-02, -1.254e-01, -9.231e-03));
	r += mul(s3_4, M4(1.093e-02, 6.117e-02, -2.334e-02, 7.731e-02, -1.889e-01, -4.521e-02, -1.381e-01, 4.990e-02, -3.273e-03, -1.392e-01, -5.917e-02, -1.084e-01, -4.124e-02, -1.464e-01, -8.814e-02, -2.305e-01));
	r += mul(s3_5, M4(2.954e-03, -2.640e-02, -1.878e-02, -4.553e-02, -1.245e-01, 3.719e-01, 7.327e-02, -8.666e-02, 7.281e-02, 6.134e-02, 6.567e-02, 7.487e-03, 7.990e-03, 2.353e-02, 1.700e-02, 3.026e-02));
	r += mul(s3_6, M4(-1.941e-02, 4.529e-03, -4.042e-02, 2.598e-03, 4.582e-02, -7.800e-03, 3.671e-02, -2.995e-02, 1.575e-02, -9.918e-03, -3.626e-02, -7.104e-03, 1.194e-02, 1.250e-02, 1.155e-03, -5.683e-04));
	r += mul(s3_7, M4(1.277e-02, -1.435e-02, 1.008e-01, 3.798e-02, -1.700e-03, 2.830e-02, 2.744e-01, -1.602e-01, -3.744e-02, 9.253e-02, 9.984e-03, 2.357e-02, 7.448e-03, 1.759e-02, 1.404e-02, 2.423e-02));
	r += mul(s3_8, M4(-2.164e-02, -2.065e-02, -2.798e-02, 4.532e-03, -2.055e-02, -4.854e-02, -2.606e-02, 1.503e-02, 2.083e-02, -1.722e-02, 1.066e-02, 5.132e-02, 3.575e-04, -2.594e-03, 2.816e-03, 5.034e-03));
	r += mul(s4_0, M4(-1.426e-02, 1.257e-02, 1.033e-02, 1.935e-02, -3.030e-02, 1.229e-02, -4.250e-03, 6.165e-03, -1.997e-03, -9.436e-03, -2.618e-02, -1.437e-02, -1.190e-02, -2.632e-02, 2.175e-02, 2.317e-02));
	r += mul(s4_1, M4(2.095e-01, -5.779e-02, 5.158e-02, 2.231e-03, -4.922e-02, -8.571e-02, -4.875e-02, 2.141e-02, 9.009e-02, -1.725e-02, -5.313e-02, -2.566e-02, 1.735e-02, 4.925e-02, 1.420e-02, 1.780e-02));
	r += mul(s4_2, M4(-9.185e-02, -5.607e-02, -8.766e-02, -1.148e-01, -3.548e-03, -2.415e-02, -2.607e-02, -2.083e-02, 1.337e-03, 5.432e-02, 6.464e-03, 3.675e-03, 5.501e-03, -3.110e-03, 1.074e-02, 2.205e-02));
	r += mul(s4_3, M4(3.402e-02, -2.865e-02, -1.933e-02, -3.812e-02, -4.599e-02, 3.587e-03, -5.386e-02, 4.163e-02, 6.190e-02, -1.407e-02, 1.190e-02, -6.332e-03, 1.268e-01, 7.919e-03, -1.616e-01, 9.103e-03));
	r += mul(s4_4, M4(-1.923e-02, 1.850e-02, 2.046e-01, -1.099e-01, 1.178e-01, 2.343e-01, 2.749e-01, -1.415e-01, -3.663e-01, 2.106e-01, 1.741e-01, 7.106e-02, 2.185e-01, 2.617e-01, -2.407e-01, -4.685e-01));
	r += mul(s4_5, M4(-7.289e-02, 1.753e-02, -6.698e-02, 1.516e-01, -2.065e-02, -4.328e-02, -4.114e-02, -1.211e-02, 4.070e-02, 5.314e-02, -3.458e-02, -2.126e-01, -3.308e-03, 6.276e-02, -2.422e-03, 5.390e-03));
	r += mul(s4_6, M4(-2.044e-02, 9.857e-03, -5.566e-03, 1.947e-02, -2.924e-03, -2.792e-02, -2.582e-02, -1.406e-02, 4.819e-02, -3.570e-02, 5.594e-02, -4.488e-02, 1.468e-02, -1.481e-02, 2.532e-02, 4.259e-03));
	r += mul(s4_7, M4(4.942e-02, -2.351e-02, 6.075e-03, -4.373e-03, -1.144e-02, 5.010e-03, -1.145e-03, 3.259e-02, -1.478e-03, -5.162e-03, 4.193e-03, -8.413e-02, -4.683e-02, 2.837e-03, -2.870e-02, -3.504e-02));
	r += mul(s4_8, M4(-9.462e-03, 3.463e-04, -3.184e-02, -2.237e-02, -5.753e-03, -8.946e-03, -1.983e-02, 1.403e-02, 3.208e-04, -6.783e-03, 2.986e-02, 9.583e-02, 9.652e-03, -1.106e-02, 2.724e-02, 4.058e-02));
	r += mul(s5_0, M4(-4.553e-03, 1.354e-02, 1.294e-02, 2.198e-02, -3.796e-02, 1.777e-03, -1.345e-02, -2.558e-03, 6.565e-03, -8.288e-03, -1.664e-02, -1.294e-02, -5.033e-03, -5.418e-03, 3.318e-03, 9.769e-03));
	r += mul(s5_1, M4(2.888e-03, -1.481e-02, 1.988e-02, -5.457e-03, -3.176e-02, -4.046e-02, -3.968e-02, -7.777e-03, 3.050e-03, 1.551e-02, -4.423e-02, -2.049e-02, -2.785e-03, 8.283e-03, 8.081e-03, 5.535e-03));
	r += mul(s5_2, M4(-7.639e-03, 5.054e-03, -6.512e-02, -4.601e-02, -1.182e-02, -4.087e-02, -1.517e-02, -3.973e-02, 1.923e-02, -3.636e-03, 2.031e-02, -7.553e-03, 6.687e-03, 1.134e-02, 6.916e-03, 1.592e-02));
	r += mul(s5_3, M4(5.164e-02, -3.007e-02, 6.146e-04, -4.163e-02, -2.801e-02, 7.026e-03, -2.300e-02, 1.935e-02, -3.085e-03, -6.896e-03, 2.654e-02, -5.054e-03, -1.381e-02, 3.617e-03, -2.664e-02, -2.407e-03));
	r += mul(s5_4, M4(-5.210e-02, 6.646e-02, -5.044e-02, -2.083e-02, 1.804e-01, 6.674e-02, 1.508e-01, 7.434e-03, 7.070e-02, 1.608e-02, -3.052e-02, 1.301e-03, 1.424e-02, -3.447e-02, 8.955e-03, -5.847e-02));
	r += mul(s5_5, M4(2.626e-03, -2.935e-02, 9.258e-02, 9.205e-02, -2.765e-02, 1.139e-03, -2.845e-02, -1.555e-02, -5.127e-02, 1.636e-02, -3.642e-02, 4.822e-02, -1.547e-02, 1.102e-02, -5.527e-03, 7.313e-03));
	r += mul(s5_6, M4(-2.205e-02, 7.127e-03, 3.632e-03, 1.646e-02, -1.045e-02, -9.007e-03, -3.836e-02, -2.190e-02, 2.273e-02, -3.534e-03, 1.640e-02, -4.160e-02, 1.927e-02, -9.104e-03, 3.589e-02, -3.850e-03));
	r += mul(s5_7, M4(3.426e-02, -1.793e-02, 2.421e-02, 1.227e-02, -1.653e-02, 1.187e-02, 2.618e-02, 1.769e-03, -5.266e-02, -9.613e-03, 7.193e-03, 8.483e-03, -1.251e-02, 1.625e-02, -4.461e-02, -3.639e-03));
	r += mul(s5_8, M4(-5.050e-03, 1.453e-02, -2.586e-02, -2.142e-02, -5.868e-03, -6.412e-03, -1.996e-02, 1.883e-02, -3.762e-04, -2.114e-02, 1.910e-02, 1.897e-02, 7.753e-03, -1.023e-03, 1.424e-02, 4.169e-03));
	r += mul(s6_0, M4(-3.845e-03, 1.689e-02, 5.021e-03, 1.254e-02, -1.487e-03, 3.818e-02, -1.402e-02, 2.054e-02, -3.812e-03, 2.115e-02, 2.435e-02, 2.093e-02, 1.783e-02, -4.674e-03, -3.655e-03, 3.153e-03));
	r += mul(s6_1, M4(5.142e-03, -6.075e-02, 1.070e-02, -2.858e-02, 7.935e-02, -9.109e-02, 4.375e-02, 1.193e-02, -9.096e-02, -7.053e-02, -1.485e-02, -7.337e-03, 1.038e-01, 3.523e-02, 1.287e-02, 3.620e-02));
	r += mul(s6_2, M4(-1.055e-02, 3.804e-02, -1.936e-02, 2.036e-02, -9.637e-03, 2.061e-02, -1.709e-02, 1.762e-02, 3.860e-03, -3.284e-02, 8.486e-03, 8.479e-03, 4.988e-03, 2.713e-02, -1.273e-02, -2.067e-02));
	r += mul(s6_3, M4(2.856e-03, 1.376e-02, 1.553e-02, 1.966e-02, 6.574e-02, 1.731e-02, 9.099e-02, 4.415e-02, 8.429e-04, -9.494e-03, -2.708e-03, -2.939e-03, 1.001e-01, -7.926e-03, 3.623e-02, 6.166e-02));
	r += mul(s6_4, M4(-5.182e-02, -8.474e-03, -7.333e-02, -3.194e-02, 2.440e-01, -3.135e-01, 1.841e-01, -3.585e-01, 1.917e-01, 1.126e-01, -7.704e-02, -6.519e-02, -4.877e-01, 1.928e-01, 4.962e-02, -2.862e-01));
	r += mul(s6_5, M4(7.056e-02, -4.759e-03, 7.693e-02, -1.180e-02, -1.746e-03, 7.848e-03, 2.640e-02, 1.388e-02, -4.918e-02, -9.407e-03, -1.582e-02, -4.991e-02, 2.390e-02, 1.084e-02, -5.813e-02, 9.644e-02));
	r += mul(s6_6, M4(1.613e-02, -9.781e-03, -1.865e-02, -4.360e-03, -8.337e-03, 5.040e-03, -1.687e-02, 4.597e-04, -3.722e-02, 1.425e-02, -1.447e-02, 2.435e-02, 1.344e-02, -1.216e-02, -3.054e-04, -3.275e-02));
	r += mul(s6_7, M4(-9.062e-03, 3.863e-02, 4.212e-02, -2.035e-04, -1.452e-02, 9.739e-03, 8.968e-02, -7.169e-02, 2.987e-02, -2.008e-02, 1.045e-01, 7.690e-02, 3.761e-02, -3.235e-02, 1.704e-01, 9.325e-02));
	r += mul(s6_8, M4(-1.967e-02, -2.017e-02, -4.216e-02, 7.447e-03, -5.092e-03, 2.067e-03, -2.545e-02, -7.547e-03, -1.169e-02, -9.225e-03, -1.654e-02, 1.105e-02, 1.381e-02, 1.372e-02, 1.354e-02, -1.234e-01));
	r += mul(s7_0, M4(-3.337e-03, 8.507e-03, -3.598e-03, 5.681e-03, 1.162e-02, 1.914e-02, -2.418e-03, 8.949e-03, -5.832e-03, 1.836e-02, 2.189e-02, 1.753e-02, 2.472e-02, -1.509e-03, -8.574e-03, -5.146e-03));
	r += mul(s7_1, M4(5.323e-02, -1.037e-02, 9.631e-03, -2.247e-02, -1.439e-02, -1.350e-02, 4.510e-02, -2.726e-03, -3.537e-02, -4.546e-02, -4.999e-03, -8.271e-03, 3.596e-02, 4.316e-02, 2.662e-02, 3.942e-02));
	r += mul(s7_2, M4(-7.218e-03, -6.411e-03, -1.728e-02, 3.503e-02, -3.096e-03, -4.575e-03, -5.780e-03, 6.029e-03, -1.083e-02, -5.370e-03, 1.031e-02, 1.510e-02, 2.472e-03, 9.614e-03, -2.293e-04, -1.862e-02));
	r += mul(s7_3, M4(3.626e-02, 9.689e-03, 4.372e-02, 9.929e-03, -1.615e-03, 1.196e-02, 3.210e-02, 4.786e-02, -9.144e-03, 6.885e-03, -1.638e-03, -5.414e-03, 1.300e-02, 1.148e-02, -4.992e-03, 5.277e-02));
	r += mul(s7_4, M4(-2.144e-01, 8.854e-02, -5.507e-02, 9.481e-02, 9.738e-03, -1.262e-02, -9.155e-02, -4.065e-02, 2.764e-01, 8.610e-02, -2.896e-01, -1.021e-01, -8.618e-02, -3.892e-02, -3.471e-02, -9.937e-02));
	r += mul(s7_5, M4(1.219e-01, -1.227e-01, 9.791e-02, -1.922e-01, 1.401e-02, 1.540e-02, 3.960e-02, 1.178e-02, -1.002e-02, 1.329e-01, -2.955e-02, -2.166e-01, 2.488e-03, -8.343e-04, -3.448e-02, -3.006e-03));
	r += mul(s7_6, M4(1.627e-02, -1.631e-02, -8.284e-03, -1.098e-02, 4.314e-03, -3.418e-03, -1.261e-02, -1.564e-02, -4.235e-02, 2.073e-02, 2.123e-02, 1.715e-02, 1.017e-02, -4.602e-04, 1.562e-02, -2.705e-02));
	r += mul(s7_7, M4(-1.332e-02, 3.030e-02, -7.299e-02, 2.072e-03, -2.778e-02, -2.832e-03, 8.620e-03, 4.325e-03, -7.304e-04, -4.965e-02, 1.513e-01, 1.655e-01, 6.935e-05, -3.351e-02, 8.485e-02, -4.985e-03));
	r += mul(s7_8, M4(-1.102e-02, -6.604e-03, -1.332e-02, 2.311e-02, -3.667e-03, -1.546e-02, -1.896e-02, -2.554e-02, -1.369e-02, -2.191e-02, -2.209e-02, -7.603e-03, -1.588e-02, -3.563e-03, -2.569e-02, 5.890e-02));
	r += V4(4.986e-04, 3.006e-04, 5.437e-04, 6.561e-04);
	return tanh(r);
}

void Pass10(uint2 blockStart, uint3 tid) {
	float2 pt = float2(GetInputPt());
	uint2 gxy = (Rmp8x8(tid.x) << 1) + blockStart;
	uint2 size = GetOutputSize();
	if (gxy.x >= size.x || gxy.y >= size.y) {
		return;
	}
	float2 pos = ((gxy >> 1) + 0.5) * pt;

	V4 s0_0 = l0(-1.0, -1.0);
	V4 s0_1 = l0(0.0, -1.0);
	V4 s0_2 = l0(1.0, -1.0);
	V4 s0_3 = l0(-1.0, 0.0);
	V4 s0_4 = l0(0.0, 0.0);
	V4 s0_5 = l0(1.0, 0.0);
	V4 s0_6 = l0(-1.0, 1.0);
	V4 s0_7 = l0(0.0, 1.0);
	V4 s0_8 = l0(1.0, 1.0);
	V4 s1_0 = -max(-s0_0, 0.0);
	V4 s1_1 = -max(-s0_1, 0.0);
	V4 s1_2 = -max(-s0_2, 0.0);
	V4 s1_3 = -max(-s0_3, 0.0);
	V4 s1_4 = -max(-s0_4, 0.0);
	V4 s1_5 = -max(-s0_5, 0.0);
	V4 s1_6 = -max(-s0_6, 0.0);
	V4 s1_7 = -max(-s0_7, 0.0);
	V4 s1_8 = -max(-s0_8, 0.0);
	s0_0 = max(s0_0, 0.0);
	s0_1 = max(s0_1, 0.0);
	s0_2 = max(s0_2, 0.0);
	s0_3 = max(s0_3, 0.0);
	s0_4 = max(s0_4, 0.0);
	s0_5 = max(s0_5, 0.0);
	s0_6 = max(s0_6, 0.0);
	s0_7 = max(s0_7, 0.0);
	s0_8 = max(s0_8, 0.0);

	V4 s2_0 = l1(-1.0, -1.0);
	V4 s2_1 = l1(0.0, -1.0);
	V4 s2_2 = l1(1.0, -1.0);
	V4 s2_3 = l1(-1.0, 0.0);
	V4 s2_4 = l1(0.0, 0.0);
	V4 s2_5 = l1(1.0, 0.0);
	V4 s2_6 = l1(-1.0, 1.0);
	V4 s2_7 = l1(0.0, 1.0);
	V4 s2_8 = l1(1.0, 1.0);
	V4 s3_0 = -max(-s2_0, 0.0);
	V4 s3_1 = -max(-s2_1, 0.0);
	V4 s3_2 = -max(-s2_2, 0.0);
	V4 s3_3 = -max(-s2_3, 0.0);
	V4 s3_4 = -max(-s2_4, 0.0);
	V4 s3_5 = -max(-s2_5, 0.0);
	V4 s3_6 = -max(-s2_6, 0.0);
	V4 s3_7 = -max(-s2_7, 0.0);
	V4 s3_8 = -max(-s2_8, 0.0);
	s2_0 = max(s2_0, 0.0);
	s2_1 = max(s2_1, 0.0);
	s2_2 = max(s2_2, 0.0);
	s2_3 = max(s2_3, 0.0);
	s2_4 = max(s2_4, 0.0);
	s2_5 = max(s2_5, 0.0);
	s2_6 = max(s2_6, 0.0);
	s2_7 = max(s2_7, 0.0);
	s2_8 = max(s2_8, 0.0);

	V4 s4_0 = l2(-1.0, -1.0);
	V4 s4_1 = l2(0.0, -1.0);
	V4 s4_2 = l2(1.0, -1.0);
	V4 s4_3 = l2(-1.0, 0.0);
	V4 s4_4 = l2(0.0, 0.0);
	V4 s4_5 = l2(1.0, 0.0);
	V4 s4_6 = l2(-1.0, 1.0);
	V4 s4_7 = l2(0.0, 1.0);
	V4 s4_8 = l2(1.0, 1.0);
	V4 s5_0 = -max(-s4_0, 0.0);
	V4 s5_1 = -max(-s4_1, 0.0);
	V4 s5_2 = -max(-s4_2, 0.0);
	V4 s5_3 = -max(-s4_3, 0.0);
	V4 s5_4 = -max(-s4_4, 0.0);
	V4 s5_5 = -max(-s4_5, 0.0);
	V4 s5_6 = -max(-s4_6, 0.0);
	V4 s5_7 = -max(-s4_7, 0.0);
	V4 s5_8 = -max(-s4_8, 0.0);
	s4_0 = max(s4_0, 0.0);
	s4_1 = max(s4_1, 0.0);
	s4_2 = max(s4_2, 0.0);
	s4_3 = max(s4_3, 0.0);
	s4_4 = max(s4_4, 0.0);
	s4_5 = max(s4_5, 0.0);
	s4_6 = max(s4_6, 0.0);
	s4_7 = max(s4_7, 0.0);
	s4_8 = max(s4_8, 0.0);

	V4 s6_0 = l3(-1.0, -1.0);
	V4 s6_1 = l3(0.0, -1.0);
	V4 s6_2 = l3(1.0, -1.0);
	V4 s6_3 = l3(-1.0, 0.0);
	V4 s6_4 = l3(0.0, 0.0);
	V4 s6_5 = l3(1.0, 0.0);
	V4 s6_6 = l3(-1.0, 1.0);
	V4 s6_7 = l3(0.0, 1.0);
	V4 s6_8 = l3(1.0, 1.0);
	V4 s7_0 = -max(-s6_0, 0.0);
	V4 s7_1 = -max(-s6_1, 0.0);
	V4 s7_2 = -max(-s6_2, 0.0);
	V4 s7_3 = -max(-s6_3, 0.0);
	V4 s7_4 = -max(-s6_4, 0.0);
	V4 s7_5 = -max(-s6_5, 0.0);
	V4 s7_6 = -max(-s6_6, 0.0);
	V4 s7_7 = -max(-s6_7, 0.0);
	V4 s7_8 = -max(-s6_8, 0.0);
	s6_0 = max(s6_0, 0.0);
	s6_1 = max(s6_1, 0.0);
	s6_2 = max(s6_2, 0.0);
	s6_3 = max(s6_3, 0.0);
	s6_4 = max(s6_4, 0.0);
	s6_5 = max(s6_5, 0.0);
	s6_6 = max(s6_6, 0.0);
	s6_7 = max(s6_7, 0.0);
	s6_8 = max(s6_8, 0.0);

	V4 r = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8);

	static const float3x3 rgb2yuv = {0.299, 0.587, 0.114, -0.169, -0.331, 0.5, 0.5, -0.419, -0.081};
	static const float3x3 yuv2rgb = {1, -0.00093, 1.401687, 1, -0.3437, -0.71417, 1, 1.77216, 0.00099};
	float2 opt = float2(GetOutputPt());

	pos -= 0.5f * opt;
	float3 yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb);
	OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.x), yuv.yz)), 1);

	++gxy.x;
	pos.x += opt.x;
	yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb);
	OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.y), yuv.yz)), 1);

	++gxy.y;
	pos.y += opt.y;
	yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb);
	OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.w), yuv.yz)), 1);

	--gxy.x;
	pos.x -= opt.x;
	yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb);
	OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.z), yuv.yz)), 1);
}
